Spaces:
Running
on
Zero
Running
on
Zero
| import os.path as osp | |
| import os | |
| import sys | |
| import json | |
| import itertools | |
| import time | |
| from collections import deque | |
| import torch | |
| import tqdm | |
| import concurrent.futures | |
| import psutil | |
| import io | |
| import cv2 | |
| from PIL import Image | |
| import numpy as np | |
| from scipy.ndimage import median_filter | |
| from models.SpaTrackV2.datasets.base_sfm_dataset import BaseSfMViewDataset | |
| from models.SpaTrackV2.models.utils import ( | |
| camera_to_pose_encoding, pose_encoding_to_camera | |
| ) | |
| from models.SpaTrackV2.datasets.dataset_util import ( | |
| imread_cv2, npz_loader, read_video,npy_loader,resize_crop_video, filter_video_depth | |
| ) | |
| from models.SpaTrackV2.models.utils import matrix_to_quaternion | |
| from models.SpaTrackV2.models.camera_transform import normalize_cameras | |
| from models.SpaTrackV2.datasets.dataset_util import imread_cv2, npz_loader | |
| from models.SpaTrackV2.datasets.dust3r_utils.utils import colmap_to_opencv_intrinsics | |
| import glob | |
| def bytes_to_gb(bytes): | |
| return bytes / (1024 ** 3) | |
| def get_total_size(obj, seen=None): | |
| size = sys.getsizeof(obj) | |
| if seen is None: | |
| seen = set() | |
| obj_id = id(obj) | |
| if obj_id in seen: | |
| return 0 | |
| seen.add(obj_id) | |
| if isinstance(obj, dict): | |
| size += sum([get_total_size(v, seen) for v in obj.values()]) | |
| size += sum([get_total_size(k, seen) for k in obj.keys()]) | |
| elif hasattr(obj, '__dict__'): | |
| size += get_total_size(obj.__dict__, seen) | |
| elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)): | |
| size += sum([get_total_size(i, seen) for i in obj]) | |
| return size | |
| def depth_read(depth_file): | |
| depth = np.load(depth_file) | |
| depth_min = np.percentile(depth, 5) | |
| depth[depth==np.nan] = 0 | |
| depth[depth==np.inf] = 0 | |
| depth_max = np.percentile(depth, 95) | |
| depth[depth>=depth_max] = 0 | |
| depth[depth>=depth_min+200] = 0 | |
| return depth | |
| def read_camera_parameters(filename): | |
| with open(filename) as f: | |
| lines = f.readlines() | |
| lines = [line.rstrip() for line in lines] | |
| # extrinsics: line [1,5), 4x4 matrix | |
| extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ').reshape((4, 4)) | |
| # intrinsics: line [7-10), 3x3 matrix | |
| intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ').reshape((3, 3)) | |
| return intrinsics, extrinsics | |
| class EgoData(BaseSfMViewDataset): | |
| def __init__(self, mask_bg=True, scene_st=None, scene_end=None, | |
| debug=False, *args, ROOT, **kwargs): | |
| self.ROOT = ROOT | |
| super().__init__(*args, **kwargs) | |
| assert mask_bg in (True, False, 'rand') | |
| self.mask_bg = mask_bg | |
| self.dataset_label = 'EgoData' | |
| # load all scenes | |
| self.scene_list = os.listdir(self.ROOT) | |
| # get the scene info | |
| self.scene_list = [os.path.join(self.ROOT,dir_i) for dir_i in self.scene_list if os.path.isdir(os.path.join(self.ROOT,dir_i))] | |
| except_scene = [] | |
| self.scene_list = [dir_i for dir_i in self.scene_list if dir_i.split("/")[-1] not in except_scene] | |
| def __len__(self): | |
| return len(self.scene_list) | |
| def _get_metadatapath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'images', f'frame{view_idx:06n}.npz') | |
| def _get_impath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'images', f'frame{view_idx:06n}.jpg') | |
| def _get_depthpath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'depths', f'frame{view_idx:06n}.jpg.geometric.png') | |
| def _get_maskpath(self, obj, instance, view_idx): | |
| return osp.join(self.ROOT, obj, instance, 'masks', f'frame{view_idx:06n}.png') | |
| def _read_depthmap(self, depthpath, input_metadata=None): | |
| depthmap = imread_cv2(depthpath, cv2.IMREAD_UNCHANGED) | |
| depthmap = depthmap.astype(np.float32) | |
| return depthmap | |
| def _get_views(self, idx, resolution, rng): | |
| scene_root = self.scene_list[idx] | |
| images_pool = sorted(glob.glob(os.path.join(scene_root, "rgbs", "*.jpg"))) | |
| depths_pool = sorted(glob.glob(os.path.join(scene_root, "depths", "*.png"))) | |
| extrs_meta = dict(np.load(os.path.join(scene_root, "meta.npz"),allow_pickle=True))["c2w"] | |
| fx, fy, cx, cy = dict(np.load(os.path.join(scene_root, "meta.npz"),allow_pickle=True))["fx_fy_cx_cy"] | |
| intrs_meta = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]).astype(np.float32) | |
| T = len(images_pool) | |
| # randomly choose a scene | |
| sclae_num = int(np.random.uniform(3, 6)) | |
| start = np.random.choice(np.arange(0, max(T - sclae_num*self.num_views, 1))) | |
| idxs = np.arange(start, start+sclae_num*self.num_views, sclae_num).clip(0, T-1) | |
| extrs = [] | |
| intrs = [] | |
| rgbs = [] | |
| depths = [] | |
| # read the frames and depths | |
| for idx_i in idxs: | |
| img_dir = images_pool[idx_i] | |
| depth_dir = depths_pool[idx_i] | |
| # load rgb depth | |
| rgb = imread_cv2(img_dir) | |
| depth = imread_cv2(depth_dir, cv2.IMREAD_UNCHANGED)/1000.0 | |
| depth = depth | |
| rgbs.append(rgb) | |
| depth = median_filter(depth, size=7) | |
| depths.append(depth) | |
| # load pose | |
| intr, extr= intrs_meta, extrs_meta[idx_i] | |
| extr = np.linalg.inv(extr) | |
| extrs.append(extr) | |
| intrs.append(intr) | |
| # convert BGR to RGB | |
| rgbs = np.stack(rgbs, axis=0) | |
| depths = np.stack(depths, axis=0) | |
| # depths = filter_video_depth(depths) | |
| # rgbs = rgbs[..., [2, 1, 0]] | |
| depths = depths | |
| T, H, W, _ = rgbs.shape | |
| # convert them into numpy array | |
| intrinsics = np.stack(intrs, axis=0) | |
| extrinsics = np.stack(extrs, axis=0) | |
| traj_2d = np.zeros((self.num_views, self.track_num, 2)) | |
| traj_3d = np.zeros((self.num_views, self.track_num, 3)) | |
| vis = np.zeros((self.num_views, self.track_num)) | |
| poses = extrinsics | |
| # get tensor track | |
| traj_2d = torch.from_numpy(traj_2d) | |
| traj_3d = torch.from_numpy(traj_3d) | |
| vis = torch.from_numpy(vis) | |
| # crop and resize | |
| rgbs, depths, Intrs = resize_crop_video(rgbs, depths, intrinsics, resolution[0]) | |
| # encode the camera poses | |
| Extrs = torch.from_numpy(poses) | |
| camera_poses = Extrs #NOTE: C2W | |
| focal0 = Intrs[:, 0, 0] / resolution[0] | |
| focal1 = Intrs[:, 1, 1] / resolution[0] | |
| focal = (focal0.unsqueeze(1)+focal1.unsqueeze(1))/2 | |
| # first frame normalize | |
| camera_poses = torch.inverse(camera_poses[:1]) @ camera_poses | |
| T_center = camera_poses[:, :3, 3].mean(dim=0) | |
| Radius = (camera_poses[:, :3, 3].norm(dim=1).max()) | |
| # if Radius < 1e-2: | |
| Radius = 1 | |
| camera_poses[:, :3, 3] = (camera_poses[:, :3, 3])/Radius | |
| R = camera_poses[:, :3, :3] | |
| t = camera_poses[:, :3, 3] | |
| rot_vec = matrix_to_quaternion(R) | |
| pose_enc = torch.cat([t, rot_vec, focal], dim=1) | |
| # depth_cano = Radius*focal[:,:,None,None] / depths.clamp(min=1e-6) | |
| depth_cano = depths / Radius | |
| depth_cano[depth_cano==torch.nan] = 0 | |
| traj_3d = torch.zeros(self.num_views, self.track_num, 3) | |
| vis = torch.zeros(self.num_views, self.track_num) | |
| syn_real = torch.tensor([1]) | |
| metric_rel = torch.tensor([1]) | |
| data_dir = scene_root | |
| views = dict( | |
| rgbs=rgbs, | |
| depths=depth_cano, | |
| pose_enc=pose_enc, | |
| traj_mat=camera_poses, | |
| intrs=Intrs, | |
| traj_3d=traj_3d, | |
| vis=vis, | |
| syn_real=syn_real, | |
| metric_rel=metric_rel, | |
| data_dir=data_dir | |
| ) | |
| return views | |
| if __name__ == "__main__": | |
| from models.SpaTrackV2.datasets.base_sfm_dataset import view_name | |
| from functools import partial | |
| # from dust3r.viz import SceneViz, auto_cam_size | |
| # from dust3r.utils.image import rgb | |
| DATA_DIR = "/mnt/bn/xyxdata/data/4d_data/adt_processed" | |
| cpu_num_total = int(sys.argv[1]) if len(sys.argv) > 1 else 8 | |
| cpu_num_per = int(sys.argv[2]) if len(sys.argv) > 1 else 8 | |
| dataset = EgoData(split='train', ROOT=DATA_DIR, resolution=518, aug_crop=16, num_views=16) | |
| rng = np.random.default_rng(seed=0) | |
| data_ret = dataset._get_views(0,(518,518),rng) | |
| from models.SpaTrackV2.datasets.vis3d_check import vis4d | |
| vis4d(data_ret["rgbs"], data_ret["depths"], | |
| data_ret["traj_mat"], data_ret["intrs"], track3d=data_ret["traj_3d"], | |
| workspace="/mnt/bn/xyxdata/home/codes/my_projs/SpaTrack2/vis_results/test") | |
| # import pdb; pdb.set_trace() |