Source code for cuvis_ai.utils.numpy


import numpy as np
from typing import Tuple, Union
import json
import datetime
import os.path as osp
import cv2 as cv
import pycocotools



[docs]
def get_shape_without_batch(array: np.ndarray, ignore=()):
    if isinstance(ignore, int):
        ignore = (ignore,)

    match array:
        case np.ndarray():
            ndim = array.ndim
        case tuple():
            ndim = len(array)
        case list():
            ndim = len(array)
        case _:
            raise ValueError("Unknown input type.")

    if ndim != 3 and ndim != 4:
        raise ValueError("Input array must be 3D or 4D.")

    match array:
        case np.ndarray():
            shape = array.shape if ndim == 3 else array.shape[1:]
        case tuple():
            shape = array if ndim == 3 else array[1:]
        case list():
            shape = array if ndim == 3 else array[1:]
        case _:
            raise ValueError("Unknown input type.")

    return tuple([-1 if i in ignore else shape[i] for i in [0, 1, 2]])




[docs]
def check_array_shape(array: Union[np.ndarray, Tuple[int, int, int]], wanted_shape: Tuple[int, int, int]):
    match array:
        case np.ndarray():
            array_shape = array.shape
        case tuple():
            array_shape = array
        case list():
            array_shape = array
        case _:
            raise ValueError("Unknown input type.")

    ret = True
    for v, w in zip(array_shape, wanted_shape):
        ret &= w == -1 or v == w
    return ret




[docs]
def flatten_spatial(array: np.ndarray):
    if array.ndim == 3:
        # Array is of shape [width, height, channels]
        return array.reshape(-1, array.shape[-1])
    elif array.ndim > 3:
        return array.reshape((*array.shape[:-3], -1, array.shape[-1]))
    else:
        raise ValueError("Input array must be 3D or higher.")




[docs]
def flatten_batch_and_spatial(array: np.ndarray):
    if array.ndim == 3:
        # Array is of shape [width, height, channels]
        return array.reshape(-1, array.shape[2])
    elif array.ndim == 4:
        # Array is of shape [batch, width, height, channels]
        return array.reshape(-1, array.shape[3])
    else:
        raise ValueError("Input array must be 3D or 4D.")




[docs]
def unflatten_batch_and_spatial(array: np.ndarray, orig_shape):
    if array.ndim != 2 and array.ndim != 1:
        raise ValueError("Input array must be 2D or 1D.")
    if array.shape[0] != np.prod(orig_shape[:-1]):
        raise ValueError("Input array and orig shape do not add up.")
    return array.reshape(*orig_shape[:-1], -1)




[docs]
def unflatten_spatial(array: np.ndarray, orig_shape):
    if array.ndim != 3 and array.ndim != 2:
        raise ValueError("Input array must be 2D or 3D.")
    if array.shape[0] != np.prod(orig_shape[:-1]):
        raise ValueError("Input array and orig shape do not add up.")
    return array.reshape(*orig_shape[:-1], -1)




[docs]
def flatten_labels(array: np.ndarray):
    if array.ndim == 2:
        # Array is of shape [width, height]
        return array.reshape(-1)
    elif array.ndim == 3:
        # Array is of shape [batch, width, height]
        return array.reshape(array.shape[0], -1)
    else:
        raise ValueError("Input array must be 2D or 3D.")




[docs]
def unflatten_labels(array: np.ndarray, orig_shape):
    if array.ndim != 1 and array.ndim != 2:
        raise ValueError("Input array must be 1D or 2D.")
    return array.reshape(orig_shape)




[docs]
def flatten_batch_and_labels(array: np.ndarray):
    if array.ndim == 2:
        # Array is of shape [width, height]
        return array.reshape(-1)
    elif array.ndim == 3:
        # Array is of shape [batch, width, height]
        return array.reshape(-1)
    else:
        raise ValueError("Input array must be 2D or 3D.")




[docs]
def unflatten_batch_and_labels(array: np.ndarray, orig_shape):
    if array.ndim != 1:
        raise ValueError("Input array must be 1D.")
    return array.reshape(orig_shape)



[docs]
def binary_mask_to_rle(binary_mask):
    """
    converts a binary mask to RLE shamelessly copied from https://stackoverflow.com/a/76990451
    :param binary_mask:
    :return:
    """
    rle = {"counts": [], "size": list(binary_mask.shape)}
    # fortran order needed to correctly convert to RLE
    flattened_mask = binary_mask.ravel(order="F")
    diff_arr = np.diff(flattened_mask)
    # +1 to "convert" from indices to length
    nonzero_indices = np.where(diff_arr != 0)[0] + 1
    lengths = np.diff(np.concatenate(([0], nonzero_indices, [len(flattened_mask)])))

    # note that the odd counts are always the numbers of zeros
    if flattened_mask[0] == 1:
        lengths = np.concatenate(([0], lengths))

    rle["counts"] = lengths.tolist()

    return rle



[docs]
def gen_coco_labels(mask: np.ndarray, label_names: list, output_dir: str, name: str,img_name: str = None, single_object_per_label: bool = False):
    """
    generating coco labels from numpy image and mask, occluded objects can not be labeled correctly at the moment
    :param mask: mask should be a mask containing zeros for background and integers for labels
    :param label_names: list of labels
    :param output_dir: path where to save the json file, if the output file already exists, the generated labels will be appended
    :param name: name of the json file
    :param img: image for which these labels apply
    :param single_object_per_label: clarify if every object has its own label or if multiple objects share the same label. default = False

    :return:
    """

    now = datetime.datetime.now()
    out_ann_file = osp.realpath(osp.join(output_dir, name + "_annotations.json"))
    # if label file exists load and extend the file, create new data structure otherwise
    if osp.exists(out_ann_file):
        data = json.load(open(out_ann_file, "r"))

    else:
        data = dict(
            info=dict(
                description=None,
                url=None,
                version=None,
                year=now.year,
                contributor=None,
                date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
            ),
            licenses=[
                dict(
                    url=None,
                    id=0,
                    name=None,
                )
            ],
            images=[
                # license, url, file_name, height, width, date_captured, id
            ],
            type="instances",
            annotations=[
                # segmentation, area, iscrowd, image_id, bbox, category_id, id
            ],
            categories=[
                # supercategory, id, name
            ],
        )
        for label_id, label in enumerate(label_names):
            data['categories'].append(
                dict(
                    supercategory=None,
                    id=label_id,
                    name=label,
                )
            )

    image_id = len(data["images"]) + 1

    data["images"].append(
        dict(
            license=0,
            url=None,
            file_name=img_name if img_name else str(image_id),
            height=mask.shape[0],
            width=mask.shape[1],
            date_captured=None,
            id=image_id,
        )
    )
    for label_id, label in enumerate(label_names, start=1):
        label_mask = mask.copy()
        # remove unwanted labels
        label_mask[label_mask != label_id] = 0
        label_mask[label_mask == label_id] = 1
        label_mask = label_mask.astype(np.uint8)

        pts = []
        # if all areas of one label describe the same object put them into one annotation with iscrowd 1 and RLE compressed
        if not single_object_per_label:
            segmentation = binary_mask_to_rle(label_mask)
            # make mask Fortran contiguous as pycocotools expects it that way
            label_mask = np.asfortranarray(label_mask.astype(np.uint8))
            label_mask = pycocotools.mask.encode(label_mask)
            area = float(pycocotools.mask.area(label_mask))
            bbox = pycocotools.mask.toBbox(label_mask).flatten().tolist()
            data["annotations"].append(
                dict(
                    id=len(data["annotations"]),
                    image_id=image_id,
                    category_id=label_id,
                    segmentation=segmentation,
                    area=area,
                    bbox=bbox,
                    iscrowd=1,
                )
            )
        # if every contour is a different object but may be of the same "class" save every contour as an annotation and
        else:
            contours, _ = cv.findContours(label_mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
            for el in contours:
                contour = el.squeeze()
                es = el.shape
                pts.append(el.reshape((es[0], es[-1])))
                # make contour Fortran contiguous as pycocotools expects it that way
                contour = np.asfortranarray(contour.astype(np.uint8))
                contour = pycocotools.mask.encode(contour)
                area = float(pycocotools.mask.area(contour))
                bbox = pycocotools.mask.toBbox(contour).flatten().tolist()

                data["annotations"].append(
                    dict(
                        id=len(data["annotations"]),
                        image_id=image_id,
                        category_id=label_id,
                        segmentation=contour,
                        area=area,
                        bbox=bbox,
                        iscrowd=0,
                    )
                )

    with open(out_ann_file, "w") as f:
        json.dump(data, f)