predict.py

# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md

import cv2
import numpy as np
from subprocess import call
import torch
import torch.nn.functional as F
from cog import BasePredictor, Input, Path


class Predictor(BasePredictor):
    def setup(self) -> None:
        """Load the model into memory to make running multiple predictions efficient"""
        # download the weights to "checkpoints"

        from basicsr.archs.ddcolor_arch import DDColor

        class ImageColorizationPipeline(object):
            def __init__(self, model_path, input_size=256, model_size="large"):
                self.input_size = input_size
                if torch.cuda.is_available():
                    self.device = torch.device("cuda")
                else:
                    self.device = torch.device("cpu")

                if model_size == "tiny":
                    self.encoder_name = "convnext-t"
                else:
                    self.encoder_name = "convnext-l"

                self.decoder_type = "MultiScaleColorDecoder"

                self.model = DDColor(
                    encoder_name=self.encoder_name,
                    decoder_name="MultiScaleColorDecoder",
                    input_size=[self.input_size, self.input_size],
                    num_output_channels=2,
                    last_norm="Spectral",
                    do_normalize=False,
                    num_queries=100,
                    num_scales=3,
                    dec_layers=9,
                ).to(self.device)

                self.model.load_state_dict(
                    torch.load(model_path, map_location=torch.device("cpu"))["params"],
                    strict=False,
                )
                self.model.eval()

            @torch.no_grad()
            def process(self, img):
                self.height, self.width = img.shape[:2]
                img = (img / 255.0).astype(np.float32)
                orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1]  # (h, w, 1)

                # resize rgb image -> lab -> get grey -> rgb
                img = cv2.resize(img, (self.input_size, self.input_size))
                img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1]
                img_gray_lab = np.concatenate(
                    (img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1
                )
                img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB)

                tensor_gray_rgb = (
                    torch.from_numpy(img_gray_rgb.transpose((2, 0, 1)))
                    .float()
                    .unsqueeze(0)
                    .to(self.device)
                )
                output_ab = self.model(
                    tensor_gray_rgb
                ).cpu()  # (1, 2, self.height, self.width)

                # resize ab -> concat original l -> rgb
                output_ab_resize = (
                    F.interpolate(output_ab, size=(self.height, self.width))[0]
                    .float()
                    .numpy()
                    .transpose(1, 2, 0)
                )
                output_lab = np.concatenate((orig_l, output_ab_resize), axis=-1)
                output_bgr = cv2.cvtColor(output_lab, cv2.COLOR_LAB2BGR)

                output_img = (output_bgr * 255.0).round().astype(np.uint8)

                return output_img

        self.colorizer = ImageColorizationPipeline(
            model_path="checkpoints/ddcolor_modelscope.pth",
            input_size=512,
            model_size="large",
        )
        self.colorizer_tiny = ImageColorizationPipeline(
            model_path="checkpoints/ddcolor_paper_tiny.pth",
            input_size=512,
            model_size="tiny",
        )

    def predict(
        self,
        image: Path = Input(description="Grayscale input image."),
        model_size: str = Input(
            description="Choose the model size.",
            choices=["large", "tiny"],
            default="large",
        ),
    ) -> Path:
        """Run a single prediction on the model"""

        img = cv2.imread(str(image))
        colorizer = self.colorizer_tiny if model_size == "tiny" else self.colorizer
        image_out = colorizer.process(img)
        out_path = "/tmp/out.png"
        cv2.imwrite(out_path, image_out)
        return Path(out_path)