Spaces:
Running
on
Zero
Running
on
Zero
| import os.path as osp | |
| import os | |
| import sys | |
| import json | |
| import itertools | |
| import time | |
| from collections import deque | |
| import torch | |
| import tqdm | |
| import concurrent.futures | |
| import psutil | |
| import io | |
| import cv2 | |
| from PIL import Image | |
| import numpy as np | |
| from models.SpaTrackV2.models.utils import matrix_to_quaternion | |
| from models.SpaTrackV2.datasets.base_sfm_dataset import BaseSfMViewDataset | |
| from models.SpaTrackV2.models.utils import ( | |
| camera_to_pose_encoding, pose_encoding_to_camera | |
| ) | |
| from models.SpaTrackV2.datasets.dataset_util import ( | |
| imread_cv2, npz_loader, read_video,npy_loader,resize_crop_video | |
| ) | |
| from models.SpaTrackV2.models.camera_transform import normalize_cameras | |
| from models.SpaTrackV2.datasets.dataset_util import imread_cv2, npz_loader | |
| def bytes_to_gb(bytes): | |
| return bytes / (1024 ** 3) | |
| def get_total_size(obj, seen=None): | |
| size = sys.getsizeof(obj) | |
| if seen is None: | |
| seen = set() | |
| obj_id = id(obj) | |
| if obj_id in seen: | |
| return 0 | |
| seen.add(obj_id) | |
| if isinstance(obj, dict): | |
| size += sum([get_total_size(v, seen) for v in obj.values()]) | |
| size += sum([get_total_size(k, seen) for k in obj.keys()]) | |
| elif hasattr(obj, '__dict__'): | |
| size += get_total_size(obj.__dict__, seen) | |
| elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)): | |
| size += sum([get_total_size(i, seen) for i in obj]) | |
| return size | |
| class GTAV(BaseSfMViewDataset): | |
| def __init__(self, mask_bg=False, scene_st=None, scene_end=None, | |
| debug=False, *args, ROOT, **kwargs): | |
| self.ROOT = ROOT | |
| super().__init__(*args, **kwargs) | |
| assert mask_bg in (True, False, 'rand') | |
| self.mask_bg = mask_bg | |
| self.dataset_label = 'GTAV' | |
| # load all scenes | |
| self.scene_list = os.listdir(self.ROOT) | |
| # get the scene info | |
| self.scene_list = [os.path.join(self.ROOT,dir_i) for dir_i in self.scene_list if os.path.isdir(os.path.join(self.ROOT,dir_i))] | |
| def __len__(self): | |
| return len(self.scene_list) | |
| def _get_metadatapath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'images', f'frame{view_idx:06n}.npz') | |
| def _get_impath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'images', f'frame{view_idx:06n}.jpg') | |
| def _get_depthpath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'depths', f'frame{view_idx:06n}.jpg.geometric.png') | |
| def _get_maskpath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'masks', f'frame{view_idx:06n}.png') | |
| def _read_depthmap(self, depthpath, input_metadata=None): | |
| depthmap = imread_cv2(depthpath, cv2.IMREAD_UNCHANGED) | |
| depthmap = depthmap.astype(np.float32) | |
| return depthmap | |
| def _get_views(self, idx, resolution, rng): | |
| sclae_num = np.random.uniform(1, 2) | |
| # choose a scene | |
| scene_root = self.scene_list[idx] | |
| images_pool = np.array(sorted(os.listdir(os.path.join(scene_root, "images")))) | |
| T = len(images_pool) | |
| # randomly choose a scene | |
| sclae_num = int(np.random.uniform(2, 3)) | |
| start = np.random.choice(np.arange(0, max(T - sclae_num*self.num_views, 1))) | |
| idxs = np.arange(start, start+sclae_num*self.num_views, sclae_num) | |
| images_pick = images_pool[idxs] | |
| rgbs = [] | |
| depths = [] | |
| Extrs = [] | |
| Intrs = [] | |
| for img_i in images_pick: | |
| img_dir = os.path.join(scene_root, "images", img_i) | |
| depth_dir = img_dir.replace("/images/", "/depths/").replace("png", "exr") | |
| pose_dir = img_dir.replace("/images/", "/poses/").replace("png", "json") | |
| rgb = imread_cv2(img_dir) | |
| depth = imread_cv2(depth_dir, cv2.IMREAD_UNCHANGED) | |
| depth[np.isinf(depth)] = 1e16 | |
| with open(pose_dir, "r") as f: | |
| pose_data = json.load(f) | |
| extr = np.linalg.inv(np.array(pose_data["extrinsic"])) | |
| extr[:3, 3] /= 10 | |
| Extrs.append(extr) | |
| fx,fy,cx,cy = pose_data["f_x"],pose_data["f_y"],pose_data["c_x"],pose_data["c_y"] | |
| Intrs.append(np.float32([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])) | |
| rgbs.append(rgb) | |
| depths.append(depth) | |
| rgbs = np.stack(rgbs, axis=0) | |
| depths = np.stack(depths, axis=0) | |
| Extrs = np.stack(Extrs, axis=0) | |
| Intrs = np.stack(Intrs, axis=0) | |
| rgbs = rgbs[..., [2, 1, 0]] | |
| depths = depths | |
| T, H, W, _ = rgbs.shape | |
| # convert them into numpy array | |
| intrinsics = Intrs | |
| extrinsics = Extrs | |
| traj_2d = np.zeros((self.num_views, self.track_num, 2)) | |
| traj_3d = np.zeros((self.num_views, self.track_num, 3)) | |
| vis = np.zeros((self.num_views, self.track_num)) | |
| poses = extrinsics | |
| # get tensor track | |
| traj_2d = torch.from_numpy(traj_2d) | |
| traj_3d = torch.from_numpy(traj_3d) | |
| vis = torch.from_numpy(vis) | |
| # crop and resize | |
| rgbs, depths, Intrs = resize_crop_video(rgbs, depths, intrinsics, resolution[0]) | |
| # encode the camera poses | |
| Extrs = torch.from_numpy(poses) | |
| camera_poses = Extrs #NOTE: C2W | |
| focal0 = Intrs[:, 0, 0] / resolution[0] | |
| focal1 = Intrs[:, 1, 1] / resolution[0] | |
| focal = (focal0.unsqueeze(1)+focal1.unsqueeze(1))/2 | |
| # first frame normalize | |
| camera_poses = torch.inverse(camera_poses[:1]) @ camera_poses | |
| T_center = camera_poses[:, :3, 3].mean(dim=0) | |
| Radius = (camera_poses[:, :3, 3].norm(dim=1).max()) | |
| if Radius < 1e-2: | |
| Radius = 1 | |
| camera_poses[:, :3, 3] = (camera_poses[:, :3, 3])/Radius | |
| R = camera_poses[:, :3, :3] | |
| t = camera_poses[:, :3, 3] | |
| rot_vec = matrix_to_quaternion(R) | |
| pose_enc = torch.cat([t, rot_vec, focal], dim=1) | |
| # depth_cano = Radius*focal[:,:,None,None] / depths.clamp(min=1e-6) | |
| depth_cano = depths / Radius | |
| depth_cano[depth_cano==torch.nan] = 0 | |
| traj_3d = torch.zeros(self.num_views, self.track_num, 3) | |
| vis = torch.zeros(self.num_views, self.track_num) | |
| syn_real = torch.tensor([0]) | |
| metric_rel = torch.tensor([1]) | |
| data_dir = scene_root | |
| views = dict( | |
| rgbs=rgbs, | |
| depths=depth_cano, | |
| pose_enc=pose_enc, | |
| traj_mat=camera_poses, | |
| intrs=Intrs, | |
| traj_3d=traj_3d, | |
| vis=vis, | |
| syn_real=syn_real, | |
| metric_rel=metric_rel, | |
| data_dir=data_dir | |
| ) | |
| return views | |
| if __name__ == "__main__": | |
| from models.videocam.datasets.base_sfm_dataset import view_name | |
| from functools import partial | |
| # from dust3r.viz import SceneViz, auto_cam_size | |
| # from dust3r.utils.image import rgb | |
| DATA_DIR = "/nas3/zsz/GTAV_540" | |
| dataset = GTAV(split='train', ROOT=DATA_DIR, resolution=518, | |
| aug_crop=16, num_views=16) | |
| rng = np.random.default_rng(seed=0) | |
| data_ret = dataset._get_views(0,(518,518),rng) | |
| from models.videocam.datasets.vis3d_check import vis4d | |
| vis4d(data_ret["rgbs"], data_ret["depths"], | |
| data_ret["traj_mat"], data_ret["intrs"], workspace="/home/xyx/home/codes/SpaTrackerV2/vis_results/test") | |
| import pdb; pdb.set_trace() |