refactor: improved projection APIs (#53)

yxlao · Apr 11, 2024 · 9ab2b92 · 9ab2b92
1 parent 8fe8dd1
commit 9ab2b92
Show file tree

Hide file tree

Showing 4 changed files with 202 additions and 158 deletions.
diff --git a/camtools/convert.py b/camtools/convert.py
@@ -3,6 +3,7 @@
 import torch
 
 from . import sanity
+from . import convert
 
 
 def pad_0001(array):
@@ -52,7 +53,7 @@ def pad_0001(array):
 
 def rm_pad_0001(array, check_vals=False):
  """
- Remove the homogeneous bottom row [0, 0, 0, 1].
+ Remove the bottom row of [0, 0, 0, 1].
 
  Args:
  array: (4, 4) or (N, 4, 4).
@@ -117,6 +118,45 @@ def rm_pad_0001(array, check_vals=False):
  return array[..., :3, :]
 
 
+def to_homo(array):
+ """
+ Convert a 2D array to homogeneous coordinates by appending a column of ones.
+
+ Args:
+ array: A 2D numpy array of shape (N, M).
+
+ Returns:
+ A numpy array of shape (N, M+1) with a column of ones appended.
+ """
+ if not isinstance(array, np.ndarray) or array.ndim != 2:
+ raise ValueError(f"Input must be a 2D numpy array, but got {array.shape}.")
+
+ ones = np.ones((array.shape[0], 1), dtype=array.dtype)
+ return np.hstack((array, ones))
+
+
+def from_homo(array):
+ """
+ Convert an array from homogeneous to Cartesian coordinates by dividing by the
+ last column and removing it.
+
+ Args:
+ array: A 2D numpy array of shape (N, M) in homogeneous coordinates.
+
+ Returns:
+ A numpy array of shape (N, M-1) in Cartesian coordinates.
+ """
+ if not isinstance(array, np.ndarray) or array.ndim != 2:
+ raise ValueError(f"Input must be a 2D numpy array, but got {array.shape}.")
+ if array.shape[1] < 2:
+ raise ValueError(
+ f"Input array must have at least two columns for removing "
+ f"homogeneous coordinate, but got shape {array.shape}."
+ )
+
+ return array[:, :-1] / array[:, -1, np.newaxis]
+
+
 def R_to_quat(R):
  # https://github.com/isl-org/StableViewSynthesis/tree/main/co
  R = R.reshape(-1, 3, 3)
@@ -385,17 +425,14 @@ def K_T_to_W2P(K, T):
 
 def P_to_W2P(P):
  sanity.assert_shape_3x4(P, name="P")
- if torch.is_tensor(P):
- bottom_row = torch.tensor([0, 0, 0, 1], device=P.device, dtype=P.dtype)
- W2P = torch.vstack((P, bottom_row))
- else:
- bottom_row = np.array([[0, 0, 0, 1]])
- W2P = np.vstack((P, bottom_row))
+ W2P = convert.pad_0001(P)
  return W2P
 
 
 def W2P_to_P(W2P):
- P = W2P[:3, :4]
+ if W2P.shape != (4, 4):
+ raise ValueError(f"Expected W2P of shape (4, 4), but got {W2P.shape}.")
+ P = convert.rm_pad_0001(W2P, check_vals=True)
  return P
 
 

diff --git a/camtools/project.py b/camtools/project.py
@@ -3,17 +3,16 @@
 """
 
 import numpy as np
-import torch
 from . import sanity
 from . import convert
 
 
-def points_to_pixel(points, K, T):
+def point_cloud_to_pixel(points, K, T):
  """
  Project points in world coordinates to pixel coordinates.
 
  Example usage:
- pixels = ct.project.points_to_pixel(points, K, T)
+ pixels = ct.project.point_cloud_to_pixel(points, K, T)
 
  cols = pixels[:, 0] # cols, width, x, top-left to top-right
  rows = pixels[:, 1] # rows, height, y, top-left to bottom-left
@@ -40,156 +39,119 @@ def points_to_pixel(points, K, T):
 
  W2P = convert.K_T_to_W2P(K, T)
 
- # points_homo: (N, 4)
- N = len(points)
- if torch.is_tensor(points):
- ones = torch.ones((N, 1), dtype=points.dtype, device=points.device)
- points_homo = torch.hstack((points, ones))
- else:
- ones = np.ones((N, 1))
- points_homo = np.hstack((points, ones))
-
- # points_out: (N, 4)
- # points_out = (W2P @ points_homo.T).T
- # = points_homo @ W2P.T
- points_out = points_homo @ W2P.T
-
- # points_out: (N, 3)
- # points_out discard the last column
- points_out = points_out[:, :3]
-
- # points_out: (N, 2)
- # points_out convert homo to regular
- points_out = points_out[:, :2] / points_out[:, 2:]
-
- return points_out
-
-
-def im_depth_to_points(im_depth, K, T):
- """
- Convert depth image to point cloud. Assumes valid depths > 0 and < inf.
- Invalid depths are ignored. The depth image should already be in world
- scale. That is, each pixel value represents the distance between the camera
- center and the point in meters.
-
- Args:
- im_depth: depth image (H, W), float32, already in world scale.
- K: intrinsics (3, 3)
- T: extrinsics (4, 4)
-
- Returns:
- points: (N, 3) points in world coordinates.
- """
- sanity.assert_K(K)
- sanity.assert_T(T)
-
- height, width = im_depth.shape
- im_valid_mask = (im_depth.flatten() > 0) & (im_depth.flatten() < np.inf)
- pose = np.linalg.inv(T)
-
- # pixels.shape == (height, width, 2)
- # pixels[r, c] == [c, r] # Since x-axis goes from top-left to top-right.
- pixels = np.transpose(np.indices((width, height)), (2, 1, 0))
- # (height * width, 2)
- pixels = pixels.reshape((-1, 2))
- # (num_points, 2)
- pixels = pixels[im_valid_mask]
- # (num_points, 3)
- pixels = np.hstack((pixels, np.ones((pixels.shape[0], 1))))
- # (num_points, )
- depths = im_depth.flatten()[im_valid_mask]
- # C(num_points, 3)
- points = depths.reshape((-1, 1)) * (np.linalg.inv(K) @ pixels.T).T
- # (num_points, 4)
- points = np.hstack((points, np.ones((points.shape[0], 1))))
- # (num_points, 4)
- points = (pose @ points.T).T
- # (num_points, 3)
- points = points[:, :3]
-
- return points
-
-
-def im_depth_to_im_points(im_depth, K, T):
+ # (N, 3) -> (N, 4)
+ points = convert.to_homo(points)
+ # (N, 4)
+ pixels = (W2P @ points.T).T
+ # (N, 4) -> (N, 3), discard the last column
+ pixels = pixels[:, :3]
+ # (N, 3) -> (N, 2)
+ pixels = convert.from_homo(pixels)
+
+ return pixels
+
+
+def depth_to_point_cloud(
+ im_depth: np.ndarray,
+ K: np.ndarray,
+ T: np.ndarray,
+ im_color: np.ndarray = None,
+ return_as_image: bool = False,
+ ignore_invalid: bool = True,
+):
  """
- Convert depth image to point cloud. Each pixel will be converted to exactly
- one points. Invalid depths are still returned, the returned shape is
- (H, W, 3), which is different from im_depth_to_points.
+ Convert a depth image to a point cloud, optionally including color information.
+ Can return either a sparse (N, 3) point cloud or a dense one with the image
+ shape (H, W, 3).
 
  Args:
- im_depth: depth image (H, W), float32, already in world scale.
- K: intrinsics (3, 3)
- T: extrinsics (4, 4)
+ im_depth: Depth image (H, W), float32, in world scale.
+ K: Intrinsics matrix (3, 3).
+ T: Extrinsics matrix (4, 4).
+ im_color: Color image (H, W, 3), float32/float64, range [0, 1].
+ as_image: If True, returns a dense point cloud with the same shape as the
+ input depth image (H, W, 3), while ignore_invalid is ignored as the
+ invalid depths are not removed. If False, returns a sparse point cloud
+ of shape (N, 3) while respecting ignore_invalid flag.
+ ignore_invalid: If True, ignores invalid depths (<= 0 or >= inf).
 
  Returns:
- points: (H, W, 3) points in world coordinates.
+ - im_color == None, as_image == False:
+ - return: points (N, 3)
+ - im_color == None, as_image == True:
+ - return: im_points (H, W, 3)
+ - im_color != None, as_image == False:
+ - return: (points (N, 3), colors (N, 3))
+ - im_color != None, as_image == True:
+ - return: (im_points (H, W, 3), im_colors (H, W, 3))
  """
+ # Sanity checks
  sanity.assert_K(K)
  sanity.assert_T(T)
+ if not isinstance(im_depth, np.ndarray):
+ raise TypeError("im_depth must be a numpy array")
+ if im_depth.dtype != np.float32:
+ raise TypeError("im_depth must be of type float32")
+ if im_depth.ndim != 2:
+ raise ValueError("im_depth must be a 2D array")
+ if im_color is not None:
+ if not isinstance(im_color, np.ndarray):
+ raise TypeError("im_color must be a numpy array")
+ if im_color.shape[:2] != im_depth.shape or im_color.ndim != 3:
+ raise ValueError(
+ f"im_color must be (H, W, 3), and have the same "
+ f"shape as im_depth, but got {im_color.shape}."
+ )
+ if im_color.dtype not in [np.float32, np.float64]:
+ raise TypeError("im_color must be of type float32 or float64")
+ if im_color.max() > 1.0 or im_color.min() < 0.0:
+ raise ValueError("im_color values must be in the range [0, 1]")
+ if return_as_image and ignore_invalid:
+ print("Warning: ignore_invalid is ignored when return_as_image is True.")
+ ignore_invalid = False
 
  height, width = im_depth.shape
- pose = np.linalg.inv(T)
+ pose = convert.T_to_pose(T)
 
  # pixels.shape == (height, width, 2)
- # pixels[r, c] == [c, r] # Since x-axis goes from top-left to top-right.
+ # pixels[r, c] == [c, r], since x-axis goes from top-left to top-right.
  pixels = np.transpose(np.indices((width, height)), (2, 1, 0))
  # (height * width, 2)
  pixels = pixels.reshape((-1, 2))
  # (height * width, 3)
- pixels = np.hstack((pixels, np.ones((pixels.shape[0], 1))))
+ pixels_homo = convert.to_homo(pixels)
  # (height * width, )
  depths = im_depth.flatten()
- # C(height * width, 3)
- points = depths.reshape((-1, 1)) * (np.linalg.inv(K) @ pixels.T).T
- # (height * width, 4)
- points = np.hstack((points, np.ones((points.shape[0], 1))))
- # (height * width, 4)
- points = (pose @ points.T).T
- # (height * width, 3)
- points = points[:, :3]
- # (height, width, 3)
- points = points.reshape((height, width, 3))
-
- return points
 
-
-def im_depth_im_color_to_points_colors(im_depth, im_color, K, T):
- """
- Convert depth and color image to a colored point cloud. Assumes valid depths
- > 0 and < inf. Invalid depths are ignored. The depth image should already be
- in world scale. That is, each pixel value represents the distance between
- the camera center and the point in meters.
-
- Args:
- im_depth: depth image (H, W), float32, already in world scale.
- im_color: color image (H, W, 3), float32/float64, in [0, 1].
- K: intrinsics (3, 3)
- T: extrinsics (4, 4)
-
- Returns:
- points: (N, 3) points in world coordinates.
- colors: (N, 3) colors in [0, 1], float32/float64.
- """
- sanity.assert_K(K)
- sanity.assert_T(T)
- sanity.assert_shape(im_color, (None, None, 3), name="im_color")
- sanity.assert_shape(im_depth, (None, None), name="im_depth")
- assert len(im_color) == len(im_depth)
- assert im_color.shape[0] == im_depth.shape[0]
- assert im_color.shape[1] == im_depth.shape[1]
- assert im_color.dtype == np.float32 or im_color.dtype == np.float64
- assert im_depth.dtype == np.float32 or im_depth.dtype == np.float64
- assert im_color.max() <= 1.0
- assert im_color.min() >= 0.0
-
- im_valid_mask = (im_depth.flatten() > 0) & (im_depth.flatten() < np.inf)
- num_valid = np.sum(im_valid_mask)
-
- points = im_depth_to_points(im_depth, K, T)
- if len(points) != num_valid:
- raise ValueError(
- f"# of points ({len(points)}) does not match num_valid ({num_valid})"
- )
- colors = im_color.reshape((-1, 3))[im_valid_mask]
-
- return points, colors
+ if ignore_invalid:
+ valid_mask = (depths > 0) & (depths < np.inf)
+ depths = depths[valid_mask]
+ pixels_homo = pixels_homo[valid_mask]
+ if im_color is not None:
+ colors = im_color.reshape((-1, 3))[valid_mask]
+
+ # Transform pixel coordinates to world coordinates.
+ # (height * width, 1)
+ depths = depths.reshape((-1, 1))
+
+ # (N, 3)
+ points_camera = depths * (np.linalg.inv(K) @ pixels_homo.T).T
+ # (N, 4)
+ points_world = (pose @ (convert.to_homo(points_camera).T)).T
+ # (N, 3)
+ points_world = convert.from_homo(points_world)
+
+ if return_as_image:
+ assert (
+ ignore_invalid == False
+ ), "ignore_invalid is ignored when return_as_image is True."
+ points_world = points_world.reshape((height, width, 3))
+ if im_color is None:
+ return points_world
+ else:
+ return points_world, im_color
+ else:
+ if im_color is None:
+ return points_world
+ else:
+ return points_world, colors
diff --git a/camtools/transform.py b/camtools/transform.py
@@ -3,8 +3,9 @@
 """
 
 import numpy as np
-import torch
+
 from . import sanity
+from . import convert
 
 
 def transform_points(points, transform_mat):
@@ -22,15 +23,8 @@ def transform_points(points, transform_mat):
  sanity.assert_shape_4x4(transform_mat, name="mat")
  sanity.assert_same_device(points, transform_mat)
 
- N = len(points)
- if torch.is_tensor(transform_mat):
- ones = torch.ones((N, 1), dtype=points.dtype, device=points.device)
- points_homo = torch.hstack((points, ones))
- else:
- ones = np.ones((N, 1))
- points_homo = np.hstack((points, ones))
-
- # (mat @ points_homo.T).T
- points_out = points_homo @ transform_mat.T
- points_out = points_out[:, :3] / points_out[:, 3:]
- return points_out
+ points = convert.to_homo(points)
+ points_transformed = points @ transform_mat.T # (mat @ points.T).T
+ points_transformed = convert.from_homo(points_transformed)
+
+ return points_transformed