数据集:

KETI-AIR/coco

语言:

en

大小:

100K<n<1M

许可:

apache-2.0
中文

Coco dataset loader based on tensorflow dataset coco

Object Detection

import os
from datasets import load_dataset
from PIL import Image, ImageFont, ImageDraw, ImageColor

def calc_lum(rgb):
    return (0.2126*rgb[0] + 0.7152*rgb[1] + 0.0722*rgb[2])

COLOR_MAP = [ImageColor.getrgb(code) for name, code in ImageColor.colormap.items()]

def get_text_bbox(bb, tbb, margin, im_w, im_h, anchor="leftBottom"):
    m = margin
    l, t, r, b = bb
    tl, tt, tr, tb = tbb
    bbw, bbh = r - l, b - t
    tbbw, tbbh = tr - tl, tb - tt

    # bbox (left-top)
    if anchor == "leftTop":
        ax, ay = l, t
        if tbbw*3 > bbw or tbbh*4 > bbh:
            # align (text box: left-bottom)
            x1, y1 = max(ax, 0), max(ay - tb - 2*m, 0)
            x2, y2 = min(x1 + tr + 2*m, im_w), min(y1 + tb + 2*m, im_h)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
        else:
            # align (text box: left-top)
            x1, y1 = max(ax, 0), max(ay, 0)
            x2, y2 = min(x1 + tr + 2*m, im_w), min(y1 + tb + 2*m, im_h)
            return (( x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
    elif anchor == "rightTop":
        ax, ay = r, t
        if tbbw*3 > bbw or tbbh*4 > bbh:
            # align (text box: left-bottom)
            x2, y1 = max(ax, 0), max(ay - tb - 2*m, 0)
            x1, y2 = max(x2 - tr - 2*m, 0), min(y1 + tb + 2*m, im_h)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
        else:
            # align (text box: left-top)
            x2, y1 = max(ax, 0), max(ay, 0)
            x1, y2 = max(x2 - tr - 2*m, 0), min(y1 + tb + 2*m, im_h)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
    elif anchor == "rightBottom":
        ax, ay = r, b
        if tbbw*3 > bbw or tbbh*4 > bbh:
            # align (text box: left-top)
            x2, y2 = min(ax, im_w), min(ay + tb + 2*m, im_h)
            x1, y1 = max(x2 - tr - 2*m, 0), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
        else:
            # align (text box: left-bottom)
            x2, y2 = min(ax, im_w), max(ay, 0)
            x1, y1 = max(x2 - tr - 2*m, 0), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
    elif anchor == "leftBottom":
        ax, ay = l, b
        if tbbw*3 > bbw or tbbh*4 > bbh:
            # align (text box: left-top)
            x1, y2 = min(ax, im_w), min(ay + tb + 2*m, im_h)
            x2, y1 = min(x1 + tr + 2*m, im_w), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
        else:
            # align (text box: left-bottom)
            x1, y2 = min(ax, im_w), max(ay, 0)
            x2, y1 = min(x1 + tr + 2*m, im_w), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
    elif anchor == "centerBottom":
        ax, ay = (l+r)//2, b
        if tbbw*3 > bbw or tbbh*4 > bbh:
            # align (text box: left-top)
            x1, y2 = min(ax - tr//2 - m, im_w), min(ay + tb + 2*m, im_h)
            x2, y1 = min(x1 + tr + 2*m, im_w), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
        else:
            # align (text box: left-bottom)
            x1, y2 = min(ax - tr//2 - m, im_w), max(ay, 0)
            x2, y1 = min(x1 + tr + 2*m, im_w), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))

def draw_bbox(image, objects, out_path, label_names=None, font="Roboto-Bold.ttf", fontsize=15, fill=True, opacity=60, width=2, margin=3, anchor="leftBottom"):
    fnt = ImageFont.truetype(font, fontsize)
    im_w, im_h = image.size
    
    
    img = image.convert("RGBA")
    overlay = Image.new('RGBA', img.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(overlay)
    for bb, lbl_id in zip(objects["bbox"], objects["label"]):
        c = COLOR_MAP[min(lbl_id, len(COLOR_MAP)-1)]
        fill_c = c + (opacity, ) if fill else None
        draw.rectangle((bb[0], bb[1], bb[2], bb[3]), outline=c, fill=fill_c, width=width)
        
        text = ""
        if label_names is not None:
            text = label_names[lbl_id]
            tbb = fnt.getbbox(text)
            btn_bbox, text_pos = get_text_bbox(bb, tbb, margin, im_w, im_h, anchor)
            fc = (0, 0, 0) if calc_lum(c) > 150 else (255, 255, 255)
            draw.rectangle(btn_bbox, outline=c, fill=c + (255, ))
            draw.text(text_pos, text, font=fnt, fill=fc + (255, ))
            
        img = Image.alpha_composite(img, overlay)
        overlay = Image.new('RGBA', img.size, (0, 0, 0, 0))
        draw = ImageDraw.Draw(overlay)
    img = img.convert("RGB")
    img.save(out_path)

raw_datasets = load_dataset(
                "coco.py", 
                "2017",
                cache_dir="./huggingface_datasets", 
            )

train_dataset = raw_datasets["train"]
label_list = raw_datasets["train"].features["objects"].feature['label'].names

for idx, item in zip(range(10), train_dataset):
    draw_bbox(item["image"], item["objects"], item["image/filename"], label_list)

Panoptic segmentation

import numpy as np
from datasets import load_dataset
from PIL import Image, ImageFont, ImageDraw, ImageColor
from transformers.image_transforms import (
    rgb_to_id,
)

def calc_lum(rgb):
    return (0.2126*rgb[0] + 0.7152*rgb[1] + 0.0722*rgb[2])

COLOR_MAP = [ImageColor.getrgb(code) for name, code in ImageColor.colormap.items()]

def get_text_bbox(bb, tbb, margin, im_w, im_h, anchor="leftBottom"):
    m = margin
    l, t, r, b = bb
    tl, tt, tr, tb = tbb
    bbw, bbh = r - l, b - t
    tbbw, tbbh = tr - tl, tb - tt

    # bbox (left-top)
    if anchor == "leftTop":
        ax, ay = l, t
        if tbbw*3 > bbw or tbbh*4 > bbh:
            # align (text box: left-bottom)
            x1, y1 = max(ax, 0), max(ay - tb - 2*m, 0)
            x2, y2 = min(x1 + tr + 2*m, im_w), min(y1 + tb + 2*m, im_h)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
        else:
            # align (text box: left-top)
            x1, y1 = max(ax, 0), max(ay, 0)
            x2, y2 = min(x1 + tr + 2*m, im_w), min(y1 + tb + 2*m, im_h)
            return (( x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
    elif anchor == "rightTop":
        ax, ay = r, t
        if tbbw*3 > bbw or tbbh*4 > bbh:
            # align (text box: left-bottom)
            x2, y1 = max(ax, 0), max(ay - tb - 2*m, 0)
            x1, y2 = max(x2 - tr - 2*m, 0), min(y1 + tb + 2*m, im_h)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
        else:
            # align (text box: left-top)
            x2, y1 = max(ax, 0), max(ay, 0)
            x1, y2 = max(x2 - tr - 2*m, 0), min(y1 + tb + 2*m, im_h)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
    elif anchor == "rightBottom":
        ax, ay = r, b
        if tbbw*3 > bbw or tbbh*4 > bbh:
            # align (text box: left-top)
            x2, y2 = min(ax, im_w), min(ay + tb + 2*m, im_h)
            x1, y1 = max(x2 - tr - 2*m, 0), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
        else:
            # align (text box: left-bottom)
            x2, y2 = min(ax, im_w), max(ay, 0)
            x1, y1 = max(x2 - tr - 2*m, 0), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
    elif anchor == "leftBottom":
        ax, ay = l, b
        if tbbw*3 > bbw or tbbh*4 > bbh:
            # align (text box: left-top)
            x1, y2 = min(ax, im_w), min(ay + tb + 2*m, im_h)
            x2, y1 = min(x1 + tr + 2*m, im_w), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
        else:
            # align (text box: left-bottom)
            x1, y2 = min(ax, im_w), max(ay, 0)
            x2, y1 = min(x1 + tr + 2*m, im_w), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
    elif anchor == "centerBottom":
        ax, ay = (l+r)//2, b
        if tbbw*3 > bbw or tbbh*4 > bbh:
            # align (text box: left-top)
            x1, y2 = min(ax - tr//2 - m, im_w), min(ay + tb + 2*m, im_h)
            x2, y1 = min(x1 + tr + 2*m, im_w), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))
        else:
            # align (text box: left-bottom)
            x1, y2 = min(ax - tr//2 - m, im_w), max(ay, 0)
            x2, y1 = min(x1 + tr + 2*m, im_w), max(y2 - tb - 2*m, 0)
            return ((x1, y1, x2, y2), (max(x1+m, 0), max(y1+m, 0)))

# Copied from transformers.models.detr.image_processing_detr.masks_to_boxes
def masks_to_boxes(masks: np.ndarray) -> np.ndarray:
    """
    Compute the bounding boxes around the provided panoptic segmentation masks.
    Args:
        masks: masks in format `[number_masks, height, width]` where N is the number of masks
    Returns:
        boxes: bounding boxes in format `[number_masks, 4]` in xyxy format
    """
    if masks.size == 0:
        return np.zeros((0, 4))

    h, w = masks.shape[-2:]
    y = np.arange(0, h, dtype=np.float32)
    x = np.arange(0, w, dtype=np.float32)
    # see https://github.com/pytorch/pytorch/issues/50276
    y, x = np.meshgrid(y, x, indexing="ij")

    x_mask = masks * np.expand_dims(x, axis=0)
    x_max = x_mask.reshape(x_mask.shape[0], -1).max(-1)
    x = np.ma.array(x_mask, mask=~(np.array(masks, dtype=bool)))
    x_min = x.filled(fill_value=1e8)
    x_min = x_min.reshape(x_min.shape[0], -1).min(-1)

    y_mask = masks * np.expand_dims(y, axis=0)
    y_max = y_mask.reshape(x_mask.shape[0], -1).max(-1)
    y = np.ma.array(y_mask, mask=~(np.array(masks, dtype=bool)))
    y_min = y.filled(fill_value=1e8)
    y_min = y_min.reshape(y_min.shape[0], -1).min(-1)

    return np.stack([x_min, y_min, x_max, y_max], 1)

def draw_seg(image, panoptic_image, oids, labels, out_path, label_names=None, font="Roboto-Bold.ttf", fontsize=15, opacity=160, anchor="leftBottom"):
    fnt = ImageFont.truetype(font, fontsize)
    im_w, im_h = image.size
    
    masks = np.asarray(panoptic_image, dtype=np.uint32)
    masks = rgb_to_id(masks)
    
    oids = np.array(oids, dtype=np.uint32)
    masks = masks == oids[:, None, None]
    masks = masks.astype(np.uint8)
    
    bboxes = masks_to_boxes(masks)
    
    img = image.convert("RGBA")
    
    for label, mask, bbox in zip(labels, masks, bboxes):
        c = COLOR_MAP[min(label, len(COLOR_MAP)-1)]
        cf = np.array(c + (opacity, )).astype(np.uint8)
        cmask = mask[:, :, None] * cf[None, None, :]
        cmask = Image.fromarray(cmask)
        img = Image.alpha_composite(img, cmask)
        
        if label_names is not None:
            text = label_names[label]
            tbb = fnt.getbbox(text)
            btn_bbox, text_pos = get_text_bbox(bbox, tbb, 3, im_w, im_h, anchor=anchor)
            
            overlay = Image.new('RGBA', img.size, (0, 0, 0, 0))
            draw = ImageDraw.Draw(overlay)
            
            fc = (0, 0, 0) if calc_lum(c) > 150 else (255, 255, 255)
            
            draw.rectangle(btn_bbox, outline=c, fill=c + (255, ))
            draw.text(text_pos, text, font=fnt, fill=fc + (255, ))
            
            img = Image.alpha_composite(img, overlay)
        
    img = img.convert("RGB")
    img.save(out_path)



raw_datasets = load_dataset(
                "coco.py", 
                "2017_panoptic",
                cache_dir="./huggingface_datasets", 
                # data_dir="./data",
            )

train_dataset = raw_datasets["train"]
label_list = raw_datasets["train"].features["panoptic_objects"].feature['label'].names

for idx, item in zip(range(10), train_dataset):
    draw_seg(
        item["image"], 
        item["panoptic_image"], 
        item["panoptic_objects"]["id"], 
        item["panoptic_objects"]["label"], 
        "panoptic_" + item["image/filename"], 
        label_list)