Skip to content

Commit

Permalink
refactor: improved projection APIs (#53)
Browse files Browse the repository at this point in the history
  • Loading branch information
yxlao committed Apr 11, 2024
1 parent 8fe8dd1 commit 9ab2b92
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 158 deletions.
53 changes: 45 additions & 8 deletions camtools/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import torch

from . import sanity
from . import convert


def pad_0001(array):
Expand Down Expand Up @@ -52,7 +53,7 @@ def pad_0001(array):

def rm_pad_0001(array, check_vals=False):
"""
Remove the homogeneous bottom row [0, 0, 0, 1].
Remove the bottom row of [0, 0, 0, 1].
Args:
array: (4, 4) or (N, 4, 4).
Expand Down Expand Up @@ -117,6 +118,45 @@ def rm_pad_0001(array, check_vals=False):
return array[..., :3, :]


def to_homo(array):
"""
Convert a 2D array to homogeneous coordinates by appending a column of ones.
Args:
array: A 2D numpy array of shape (N, M).
Returns:
A numpy array of shape (N, M+1) with a column of ones appended.
"""
if not isinstance(array, np.ndarray) or array.ndim != 2:
raise ValueError(f"Input must be a 2D numpy array, but got {array.shape}.")

ones = np.ones((array.shape[0], 1), dtype=array.dtype)
return np.hstack((array, ones))


def from_homo(array):
"""
Convert an array from homogeneous to Cartesian coordinates by dividing by the
last column and removing it.
Args:
array: A 2D numpy array of shape (N, M) in homogeneous coordinates.
Returns:
A numpy array of shape (N, M-1) in Cartesian coordinates.
"""
if not isinstance(array, np.ndarray) or array.ndim != 2:
raise ValueError(f"Input must be a 2D numpy array, but got {array.shape}.")
if array.shape[1] < 2:
raise ValueError(
f"Input array must have at least two columns for removing "
f"homogeneous coordinate, but got shape {array.shape}."
)

return array[:, :-1] / array[:, -1, np.newaxis]


def R_to_quat(R):
# https://github.com/isl-org/StableViewSynthesis/tree/main/co
R = R.reshape(-1, 3, 3)
Expand Down Expand Up @@ -385,17 +425,14 @@ def K_T_to_W2P(K, T):

def P_to_W2P(P):
sanity.assert_shape_3x4(P, name="P")
if torch.is_tensor(P):
bottom_row = torch.tensor([0, 0, 0, 1], device=P.device, dtype=P.dtype)
W2P = torch.vstack((P, bottom_row))
else:
bottom_row = np.array([[0, 0, 0, 1]])
W2P = np.vstack((P, bottom_row))
W2P = convert.pad_0001(P)
return W2P


def W2P_to_P(W2P):
P = W2P[:3, :4]
if W2P.shape != (4, 4):
raise ValueError(f"Expected W2P of shape (4, 4), but got {W2P.shape}.")
P = convert.rm_pad_0001(W2P, check_vals=True)
return P


Expand Down
236 changes: 99 additions & 137 deletions camtools/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@
"""

import numpy as np
import torch
from . import sanity
from . import convert


def points_to_pixel(points, K, T):
def point_cloud_to_pixel(points, K, T):
"""
Project points in world coordinates to pixel coordinates.
Example usage:
pixels = ct.project.points_to_pixel(points, K, T)
pixels = ct.project.point_cloud_to_pixel(points, K, T)
cols = pixels[:, 0] # cols, width, x, top-left to top-right
rows = pixels[:, 1] # rows, height, y, top-left to bottom-left
Expand All @@ -40,156 +39,119 @@ def points_to_pixel(points, K, T):

W2P = convert.K_T_to_W2P(K, T)

# points_homo: (N, 4)
N = len(points)
if torch.is_tensor(points):
ones = torch.ones((N, 1), dtype=points.dtype, device=points.device)
points_homo = torch.hstack((points, ones))
else:
ones = np.ones((N, 1))
points_homo = np.hstack((points, ones))

# points_out: (N, 4)
# points_out = (W2P @ points_homo.T).T
# = points_homo @ W2P.T
points_out = points_homo @ W2P.T

# points_out: (N, 3)
# points_out discard the last column
points_out = points_out[:, :3]

# points_out: (N, 2)
# points_out convert homo to regular
points_out = points_out[:, :2] / points_out[:, 2:]

return points_out


def im_depth_to_points(im_depth, K, T):
"""
Convert depth image to point cloud. Assumes valid depths > 0 and < inf.
Invalid depths are ignored. The depth image should already be in world
scale. That is, each pixel value represents the distance between the camera
center and the point in meters.
Args:
im_depth: depth image (H, W), float32, already in world scale.
K: intrinsics (3, 3)
T: extrinsics (4, 4)
Returns:
points: (N, 3) points in world coordinates.
"""
sanity.assert_K(K)
sanity.assert_T(T)

height, width = im_depth.shape
im_valid_mask = (im_depth.flatten() > 0) & (im_depth.flatten() < np.inf)
pose = np.linalg.inv(T)

# pixels.shape == (height, width, 2)
# pixels[r, c] == [c, r] # Since x-axis goes from top-left to top-right.
pixels = np.transpose(np.indices((width, height)), (2, 1, 0))
# (height * width, 2)
pixels = pixels.reshape((-1, 2))
# (num_points, 2)
pixels = pixels[im_valid_mask]
# (num_points, 3)
pixels = np.hstack((pixels, np.ones((pixels.shape[0], 1))))
# (num_points, )
depths = im_depth.flatten()[im_valid_mask]
# C(num_points, 3)
points = depths.reshape((-1, 1)) * (np.linalg.inv(K) @ pixels.T).T
# (num_points, 4)
points = np.hstack((points, np.ones((points.shape[0], 1))))
# (num_points, 4)
points = (pose @ points.T).T
# (num_points, 3)
points = points[:, :3]

return points


def im_depth_to_im_points(im_depth, K, T):
# (N, 3) -> (N, 4)
points = convert.to_homo(points)
# (N, 4)
pixels = (W2P @ points.T).T
# (N, 4) -> (N, 3), discard the last column
pixels = pixels[:, :3]
# (N, 3) -> (N, 2)
pixels = convert.from_homo(pixels)

return pixels


def depth_to_point_cloud(
im_depth: np.ndarray,
K: np.ndarray,
T: np.ndarray,
im_color: np.ndarray = None,
return_as_image: bool = False,
ignore_invalid: bool = True,
):
"""
Convert depth image to point cloud. Each pixel will be converted to exactly
one points. Invalid depths are still returned, the returned shape is
(H, W, 3), which is different from im_depth_to_points.
Convert a depth image to a point cloud, optionally including color information.
Can return either a sparse (N, 3) point cloud or a dense one with the image
shape (H, W, 3).
Args:
im_depth: depth image (H, W), float32, already in world scale.
K: intrinsics (3, 3)
T: extrinsics (4, 4)
im_depth: Depth image (H, W), float32, in world scale.
K: Intrinsics matrix (3, 3).
T: Extrinsics matrix (4, 4).
im_color: Color image (H, W, 3), float32/float64, range [0, 1].
as_image: If True, returns a dense point cloud with the same shape as the
input depth image (H, W, 3), while ignore_invalid is ignored as the
invalid depths are not removed. If False, returns a sparse point cloud
of shape (N, 3) while respecting ignore_invalid flag.
ignore_invalid: If True, ignores invalid depths (<= 0 or >= inf).
Returns:
points: (H, W, 3) points in world coordinates.
- im_color == None, as_image == False:
- return: points (N, 3)
- im_color == None, as_image == True:
- return: im_points (H, W, 3)
- im_color != None, as_image == False:
- return: (points (N, 3), colors (N, 3))
- im_color != None, as_image == True:
- return: (im_points (H, W, 3), im_colors (H, W, 3))
"""
# Sanity checks
sanity.assert_K(K)
sanity.assert_T(T)
if not isinstance(im_depth, np.ndarray):
raise TypeError("im_depth must be a numpy array")
if im_depth.dtype != np.float32:
raise TypeError("im_depth must be of type float32")
if im_depth.ndim != 2:
raise ValueError("im_depth must be a 2D array")
if im_color is not None:
if not isinstance(im_color, np.ndarray):
raise TypeError("im_color must be a numpy array")
if im_color.shape[:2] != im_depth.shape or im_color.ndim != 3:
raise ValueError(
f"im_color must be (H, W, 3), and have the same "
f"shape as im_depth, but got {im_color.shape}."
)
if im_color.dtype not in [np.float32, np.float64]:
raise TypeError("im_color must be of type float32 or float64")
if im_color.max() > 1.0 or im_color.min() < 0.0:
raise ValueError("im_color values must be in the range [0, 1]")
if return_as_image and ignore_invalid:
print("Warning: ignore_invalid is ignored when return_as_image is True.")
ignore_invalid = False

height, width = im_depth.shape
pose = np.linalg.inv(T)
pose = convert.T_to_pose(T)

# pixels.shape == (height, width, 2)
# pixels[r, c] == [c, r] # Since x-axis goes from top-left to top-right.
# pixels[r, c] == [c, r], since x-axis goes from top-left to top-right.
pixels = np.transpose(np.indices((width, height)), (2, 1, 0))
# (height * width, 2)
pixels = pixels.reshape((-1, 2))
# (height * width, 3)
pixels = np.hstack((pixels, np.ones((pixels.shape[0], 1))))
pixels_homo = convert.to_homo(pixels)
# (height * width, )
depths = im_depth.flatten()
# C(height * width, 3)
points = depths.reshape((-1, 1)) * (np.linalg.inv(K) @ pixels.T).T
# (height * width, 4)
points = np.hstack((points, np.ones((points.shape[0], 1))))
# (height * width, 4)
points = (pose @ points.T).T
# (height * width, 3)
points = points[:, :3]
# (height, width, 3)
points = points.reshape((height, width, 3))

return points


def im_depth_im_color_to_points_colors(im_depth, im_color, K, T):
"""
Convert depth and color image to a colored point cloud. Assumes valid depths
> 0 and < inf. Invalid depths are ignored. The depth image should already be
in world scale. That is, each pixel value represents the distance between
the camera center and the point in meters.
Args:
im_depth: depth image (H, W), float32, already in world scale.
im_color: color image (H, W, 3), float32/float64, in [0, 1].
K: intrinsics (3, 3)
T: extrinsics (4, 4)
Returns:
points: (N, 3) points in world coordinates.
colors: (N, 3) colors in [0, 1], float32/float64.
"""
sanity.assert_K(K)
sanity.assert_T(T)
sanity.assert_shape(im_color, (None, None, 3), name="im_color")
sanity.assert_shape(im_depth, (None, None), name="im_depth")
assert len(im_color) == len(im_depth)
assert im_color.shape[0] == im_depth.shape[0]
assert im_color.shape[1] == im_depth.shape[1]
assert im_color.dtype == np.float32 or im_color.dtype == np.float64
assert im_depth.dtype == np.float32 or im_depth.dtype == np.float64
assert im_color.max() <= 1.0
assert im_color.min() >= 0.0

im_valid_mask = (im_depth.flatten() > 0) & (im_depth.flatten() < np.inf)
num_valid = np.sum(im_valid_mask)

points = im_depth_to_points(im_depth, K, T)
if len(points) != num_valid:
raise ValueError(
f"# of points ({len(points)}) does not match num_valid ({num_valid})"
)
colors = im_color.reshape((-1, 3))[im_valid_mask]

return points, colors
if ignore_invalid:
valid_mask = (depths > 0) & (depths < np.inf)
depths = depths[valid_mask]
pixels_homo = pixels_homo[valid_mask]
if im_color is not None:
colors = im_color.reshape((-1, 3))[valid_mask]

# Transform pixel coordinates to world coordinates.
# (height * width, 1)
depths = depths.reshape((-1, 1))

# (N, 3)
points_camera = depths * (np.linalg.inv(K) @ pixels_homo.T).T
# (N, 4)
points_world = (pose @ (convert.to_homo(points_camera).T)).T
# (N, 3)
points_world = convert.from_homo(points_world)

if return_as_image:
assert (
ignore_invalid == False
), "ignore_invalid is ignored when return_as_image is True."
points_world = points_world.reshape((height, width, 3))
if im_color is None:
return points_world
else:
return points_world, im_color
else:
if im_color is None:
return points_world
else:
return points_world, colors
20 changes: 7 additions & 13 deletions camtools/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
"""

import numpy as np
import torch

from . import sanity
from . import convert


def transform_points(points, transform_mat):
Expand All @@ -22,15 +23,8 @@ def transform_points(points, transform_mat):
sanity.assert_shape_4x4(transform_mat, name="mat")
sanity.assert_same_device(points, transform_mat)

N = len(points)
if torch.is_tensor(transform_mat):
ones = torch.ones((N, 1), dtype=points.dtype, device=points.device)
points_homo = torch.hstack((points, ones))
else:
ones = np.ones((N, 1))
points_homo = np.hstack((points, ones))

# (mat @ points_homo.T).T
points_out = points_homo @ transform_mat.T
points_out = points_out[:, :3] / points_out[:, 3:]
return points_out
points = convert.to_homo(points)
points_transformed = points @ transform_mat.T # (mat @ points.T).T
points_transformed = convert.from_homo(points_transformed)

return points_transformed
Loading

0 comments on commit 9ab2b92

Please sign in to comment.