Spaces:
Running
on
Zero
Running
on
Zero
| import os.path as osp | |
| import os | |
| import sys | |
| import json | |
| import itertools | |
| import time | |
| from collections import deque | |
| import torch | |
| import tqdm | |
| import concurrent.futures | |
| import psutil | |
| import io | |
| import cv2 | |
| from PIL import Image | |
| import numpy as np | |
| from models.SpaTrackV2.models.utils import matrix_to_quaternion | |
| from models.SpaTrackV2.datasets.base_sfm_dataset import BaseSfMViewDataset | |
| from models.SpaTrackV2.models.utils import ( | |
| camera_to_pose_encoding, pose_encoding_to_camera | |
| ) | |
| from models.SpaTrackV2.datasets.dataset_util import ( | |
| imread_cv2, npz_loader, read_video,npy_loader,resize_crop_video | |
| ) | |
| from models.SpaTrackV2.models.camera_transform import normalize_cameras | |
| from models.SpaTrackV2.datasets.dataset_util import imread_cv2, npz_loader | |
| def bytes_to_gb(bytes): | |
| return bytes / (1024 ** 3) | |
| def get_total_size(obj, seen=None): | |
| size = sys.getsizeof(obj) | |
| if seen is None: | |
| seen = set() | |
| obj_id = id(obj) | |
| if obj_id in seen: | |
| return 0 | |
| seen.add(obj_id) | |
| if isinstance(obj, dict): | |
| size += sum([get_total_size(v, seen) for v in obj.values()]) | |
| size += sum([get_total_size(k, seen) for k in obj.keys()]) | |
| elif hasattr(obj, '__dict__'): | |
| size += get_total_size(obj.__dict__, seen) | |
| elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)): | |
| size += sum([get_total_size(i, seen) for i in obj]) | |
| return size | |
| class ScanNetpp(BaseSfMViewDataset): | |
| def __init__(self, mask_bg=False, scene_st=None, scene_end=None, | |
| debug=False, *args, ROOT, **kwargs): | |
| self.ROOT = ROOT | |
| super().__init__(*args, **kwargs) | |
| assert mask_bg in (True, False, 'rand') | |
| self.mask_bg = mask_bg | |
| self.dataset_label = 'ScanNetpp' | |
| # load all scenes | |
| self.scene_list = os.listdir(self.ROOT) | |
| # get the scene info | |
| self.scene_list = [os.path.join(self.ROOT,dir_i) for dir_i in self.scene_list if os.path.isdir(os.path.join(self.ROOT,dir_i))] | |
| def __len__(self): | |
| return len(self.scene_list) | |
| def _get_metadatapath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'images', f'frame{view_idx:06n}.npz') | |
| def _get_impath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'images', f'frame{view_idx:06n}.jpg') | |
| def _get_depthpath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'depths', f'frame{view_idx:06n}.jpg.geometric.png') | |
| def _get_maskpath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'masks', f'frame{view_idx:06n}.png') | |
| def _read_depthmap(self, depthpath, input_metadata=None): | |
| depthmap = imread_cv2(depthpath, cv2.IMREAD_UNCHANGED) | |
| depthmap = depthmap.astype(np.float32) | |
| return depthmap | |
| def _get_views(self, idx, resolution, rng): | |
| scene_root = self.scene_list[idx] | |
| meta_dir = os.path.join(scene_root, "scene_metadata_dslr.npz") | |
| meta_data = dict(np.load(meta_dir, allow_pickle=True).items()) | |
| new_ids = [id for id in range(len(meta_data["images"])) if meta_data["images"][id].startswith('DSC')] | |
| # re-pick | |
| meta_data["trajectories"] = meta_data["trajectories"][new_ids] | |
| meta_data["intrinsics"] = meta_data["intrinsics"][new_ids] | |
| meta_data["images"] = meta_data["images"][new_ids] | |
| order_new = np.argsort(meta_data["images"]) | |
| meta_data["trajectories"] = meta_data["trajectories"][order_new] | |
| meta_data["intrinsics"] = meta_data["intrinsics"][order_new] | |
| meta_data["images"] = meta_data["images"][order_new] | |
| T = len(meta_data["images"]) | |
| # randomly choose a scene | |
| sclae_num = int(np.random.uniform(1, 1)) | |
| start = np.random.choice(np.arange(0, max(T - sclae_num*self.num_views, 1))) | |
| idxs = np.arange(start, start+sclae_num*self.num_views, sclae_num).clip(0, T-1) | |
| # get the camera pose | |
| meta_images = meta_data["images"][idxs] | |
| extrs = meta_data["trajectories"][idxs] | |
| intrs = meta_data["intrinsics"][idxs] | |
| rgbs = [] | |
| depths = [] | |
| # read the frames and depths | |
| for i, idx_i in enumerate(idxs): | |
| idx_i = int(idx_i) | |
| img_name = meta_images[i][:-4] | |
| img_path = os.path.join(scene_root, "images", img_name+".jpg") | |
| depth_path = os.path.join(scene_root, "depth", img_name+".png") | |
| # read depth and image | |
| img = imread_cv2(img_path) | |
| depth = imread_cv2(depth_path, cv2.IMREAD_UNCHANGED) | |
| depth = depth.astype(np.float32) / 1000 | |
| rgbs.append(img) | |
| depths.append(depth) | |
| # convert BGR to RGB | |
| rgbs = np.stack(rgbs, axis=0) | |
| depths = np.stack(depths, axis=0) | |
| # rgbs = rgbs[..., [2, 1, 0]] | |
| depths = depths | |
| T, H, W, _ = rgbs.shape | |
| # convert them into numpy array | |
| intrinsics = intrs | |
| extrinsics = extrs | |
| traj_2d = np.zeros((self.num_views, self.track_num, 2)) | |
| traj_3d = np.zeros((self.num_views, self.track_num, 3)) | |
| vis = np.zeros((self.num_views, self.track_num)) | |
| poses = extrinsics | |
| # get tensor track | |
| traj_2d = torch.from_numpy(traj_2d) | |
| traj_3d = torch.from_numpy(traj_3d) | |
| vis = torch.from_numpy(vis) | |
| # crop and resize | |
| rgbs, depths, Intrs = resize_crop_video(rgbs, depths, intrinsics, resolution[0]) | |
| # encode the camera poses | |
| Extrs = torch.from_numpy(poses) | |
| camera_poses = Extrs #NOTE: C2W | |
| focal0 = Intrs[:, 0, 0] / resolution[0] | |
| focal1 = Intrs[:, 1, 1] / resolution[0] | |
| focal = (focal0.unsqueeze(1)+focal1.unsqueeze(1))/2 | |
| # first frame normalize | |
| camera_poses = torch.inverse(camera_poses[:1]) @ camera_poses | |
| T_center = camera_poses[:, :3, 3].mean(dim=0) | |
| Radius = (camera_poses[:, :3, 3].norm(dim=1).max()) | |
| # if Radius < 1e-2: | |
| Radius = 1 | |
| camera_poses[:, :3, 3] = (camera_poses[:, :3, 3])/Radius | |
| R = camera_poses[:, :3, :3] | |
| t = camera_poses[:, :3, 3] | |
| rot_vec = matrix_to_quaternion(R) | |
| pose_enc = torch.cat([t, rot_vec, focal], dim=1) | |
| # depth_cano = Radius*focal[:,:,None,None] / depths.clamp(min=1e-6) | |
| depth_cano = depths / Radius | |
| depth_cano[depth_cano==torch.nan] = 0 | |
| traj_3d = torch.zeros(self.num_views, self.track_num, 3) | |
| vis = torch.zeros(self.num_views, self.track_num) | |
| syn_real = torch.tensor([0]) | |
| metric_rel = torch.tensor([1]) | |
| data_dir = scene_root | |
| views = dict( | |
| rgbs=rgbs, | |
| depths=depth_cano, | |
| pose_enc=pose_enc, | |
| traj_mat=camera_poses, | |
| intrs=Intrs, | |
| traj_3d=traj_3d, | |
| vis=vis, | |
| syn_real=syn_real, | |
| metric_rel=metric_rel, | |
| data_dir=data_dir | |
| ) | |
| return views | |
| if __name__ == "__main__": | |
| from models.videocam.datasets.base_sfm_dataset import view_name | |
| from functools import partial | |
| # from dust3r.viz import SceneViz, auto_cam_size | |
| # from dust3r.utils.image import rgb | |
| DATA_DIR = "/data0/xyx/scannetpp_processed_v3" | |
| cpu_num_total = int(sys.argv[1]) if len(sys.argv) > 1 else 8 | |
| cpu_num_per = int(sys.argv[2]) if len(sys.argv) > 1 else 8 | |
| dataset = ScanNetpp(split='train', ROOT=DATA_DIR, resolution=518, aug_crop=16, num_views=16) | |
| rng = np.random.default_rng(seed=0) | |
| view = dataset._get_views(200,(518,518),rng) | |
| rng = np.random.default_rng(seed=0) | |
| data_ret = dataset._get_views(0,(518,518),rng) | |
| from models.videocam.datasets.vis3d_check import vis4d | |
| vis4d(data_ret["rgbs"], data_ret["depths"], | |
| data_ret["traj_mat"], data_ret["intrs"], workspace="/home/xyx/home/codes/SpaTrackerV2/vis_results/test") | |
| import pdb; pdb.set_trace() |