NVIDIA DALI从入门到放弃之五：Image Processing-CFANZ编程社区

NVIDIA DALI从入门到放弃之一：概述

NVIDIA DALI从入门到放弃之二：入门示例

NVIDIA DALI从入门到放弃之三：Data Loading

NVIDIA DALI从入门到放弃之四：Multiple GPU

NVIDIA DALI从入门到放弃之五：Image Processing

NVIDIA DALI从入门到放弃之六：Geometric Transforms

NVIDIA DALI从入门到放弃之七：Sequence Processing

NVIDIA DALI从入门到放弃之八：PyTorch Plugin API

1 Image Decoder

1-1 CPU

class ImageDecoderCropPipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super(ImageDecoderCropPipeline, self).__init__(batch_size, num_threads, device_id, seed = seed)
        self.input = ops.FileReader(file_root = image_dir)
        self.pos_rng_x = ops.random.Uniform(range = (0.0, 1.0))
        self.pos_rng_y = ops.random.Uniform(range = (0.0, 1.0))
        self.decode = ops.ImageDecoderCrop(device = 'cpu', output_type = types.RGB, crop = (224, 224))

    def define_graph(self):
        jpegs, labels = self.input()
        pos_x = self.pos_rng_x()
        pos_y = self.pos_rng_y()
        images = self.decode(jpegs, crop_pos_x=pos_x, crop_pos_y=pos_y)
        return (images, labels)

pipe = ImageDecoderCropPipeline(batch_size, 1, 0)
pipe.build()
images, _ = pipe.run()
show_images( images )

1-2 GPU

import numpy as np

class ExternalInputIterator(object):
    def __init__(self, batch_size):
        self.batch_size = batch_size

    def __iter__(self):
        self.i = 0
        self.n = self.batch_size
        return self

    def __next__(self):
        pos = []
        size = []
        for _ in range(self.batch_size):
            pos.append(np.asarray([0.4, 0.2], dtype=np.float32))
            size.append(np.asarray([0.3, 0.5], dtype=np.float32))
            self.i = (self.i + 1) % self.n
        return (pos, size)

    next = __next__

eii = ExternalInputIterator(batch_size)
pos_size_iter = iter(eii)

class ImageDecoderSlicePipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super(ImageDecoderSlicePipeline, self).__init__(batch_size, num_threads, device_id, seed = seed)

        self.input = ops.FileReader(file_root = image_dir)
        self.input_crop_pos = ops.ExternalSource()
        self.input_crop_size = ops.ExternalSource()
        self.input_crop = ops.ExternalSource()
        self.decode = ops.ImageDecoderSlice(device = 'mixed', output_type = types.RGB)

    def define_graph(self):
        jpegs, labels = self.input()
        self.crop_pos = self.input_crop_pos()
        self.crop_size = self.input_crop_size()
        images = self.decode(jpegs, self.crop_pos, self.crop_size)
        return (images, labels)

    def iter_setup(self):
        (crop_pos, crop_size) = pos_size_iter.next()
        self.feed_input(self.crop_pos, crop_pos)
        self.feed_input(self.crop_size, crop_size)

pipe = ImageDecoderSlicePipeline(batch_size, 1, 0)
pipe.build()
images, _ = pipe.run()
show_images( images.as_cpu() )

2 Color Space Conversion

2-1 CPU

class ColorCpuPipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)
        self.input = ops.FileReader(file_root = image_dir)
        self.decode = ops.ImageDecoder(device = 'cpu', output_type = types.RGB)
        self.conversions = {}
        self.conversions['rgb2bgr'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.BGR)
        self.conversions['rgb2ycbcr'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.YCbCr)
        self.conversions['rgb2gray'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.GRAY)
        
    def define_graph(self):
        self.jpegs, self.labels = self.input()
        images = self.decode(self.jpegs)
        outputs = [images]
        outs = [images for _ in range(n)]
        outs[0] = self.conversions['rgb2bgr'](outs[0])
        outs[1] = self.conversions['rgb2ycbcr'](outs[1])
        outs[2] = self.conversions['rgb2gray'](outs[2])
        return outputs +

2-2 GPU

class ColorGpuPipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)
        self.input = ops.FileReader(file_root = image_dir)
        self.decode = ops.ImageDecoder(device="mixed", output_type = types.RGB)
        self.conversions = {}
        self.conversions['rgb2bgr'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.BGR)
        self.conversions['rgb2ycbcr'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.YCbCr)
        self.conversions['rgb2gray'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.GRAY)
        
    def define_graph(self):
        self.jpegs, self.labels = self.input()
        images = self.decode(self.jpegs)
        outputs = [images.gpu()]
        outs = [images.gpu() for _ in range(n)]
        outs[0] = self.conversions['rgb2bgr'](outs[0])
        outs[1] = self.conversions['rgb2ycbcr'](outs[1])
        outs[2] = self.conversions['rgb2gray'](outs[2])
        return outputs +

3 BrightnessContrast

3-1 CPU

class BCCpuPipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super(BCCpuPipeline, self).__init__(batch_size, num_threads, device_id, seed=42)
        self.input = ops.FileReader(device="cpu", file_root=image_filename)
        self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)
        self.bc = ops.BrightnessContrast(device="cpu", brightness_shift=0.3, contrast=0.4, contrast_center=100)

    def define_graph(self):
        read, _ = self.input()
        image = self.decode(read)
        converted = self.bc(image)
        return image, converted

pipecpu = BCCpuPipeline(batch_size=batch_size, num_threads=1, device_id=0)
pipecpu.build()
cpu_output = pipecpu.run()

3-2 GPU

class BCGpuPipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super(BCGpuPipeline, self).__init__(batch_size, num_threads, device_id, seed=42)
        self.input = ops.FileReader(device="cpu", file_root=image_filename)
        self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
        self.bc = ops.BrightnessContrast(device="gpu",contrast=1.5, brightness_shift = 1, brightness=-1)

    def define_graph(self):
        read, _ = self.input()
        image = self.decode(read)
        converted = self.bc(image.gpu())
        return image, converted
        
pipegpu = BCGpuPipeline(batch_size=batch_size, num_threads=1, device_id=0)
pipegpu.build()
gpu_output = pipegpu.run()

4 HSV

4-1 CPU

class HsvCpuPipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super().__init__(batch_size, num_threads, device_id, seed=42)
        self.input = ops.FileReader(device="cpu", file_root=image_filename)
        self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)
        self.hsv = ops.Hsv(device="cpu", hue=120, saturation=1, value=0.4)

    def define_graph(self):
        read, _ = self.input()
        image = self.decode(read)
        converted = self.hsv(image)
        return image,

4-2 GPU

class HsvGpuPipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super().__init__(batch_size, num_threads, device_id, seed=42)
        self.input = ops.FileReader(device="cpu", file_root=image_filename)
        self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
        self.hsv = ops.Hsv(device="gpu", hue=120, saturation=2, value=1)

    def define_graph(self):
        read, _ = self.input()
        image = self.decode(read)
        converted = self.hsv(image.gpu())
        return image,

5 Resize

Scaling modes

“default” - the dimensions which are specified, are scaled to the requested size; the missing extents are calculated by applying average scale of the provided extents - for 2D and one extent specified, this means that aspect ratio is preserved

“stretch” - the dimensions which are specified, are scaled to the requested size; the missing ones are not scaled at all

“not_larger” - the image is scaled so that no dimension exceeds the specified size; aspect ratio is preserved

“not_smaller” - the image is scaled so that no dimension is smaler than specified (additionally, max_size argument may be used to limit upscaling of inputs with very high aspect ratios)

batch_size = 8

pipe = dali.pipeline.Pipeline(batch_size, 3, 0)
with pipe:
    files, labels = dali.fn.caffe_reader(path = db_folder, random_shuffle = True, seed = 1234)
    images = dali.fn.image_decoder(files, device = "mixed")

    width = 200
    height = 100

    out = [
          dali.fn.resize(images, size=[height, width]),
          dali.fn.resize(images, resize_x=width),
          dali.fn.resize(images, resize_y=height),
          dali.fn.resize(images, size=[height, width], mode="stretch"),
          dali.fn.resize(images, resize_x=width, mode="stretch"),
          dali.fn.resize(images, resize_y=height, mode="stretch"),
          dali.fn.resize(images, size=[height, width], mode="not_larger"),
          dali.fn.resize(images, size=[height, width], mode="not_smaller"),
          dali.fn.resize(images, size=[height, width], mode="not_smaller", max_size=[110,200]),
    ]

    pipe.set_outputs(*out)

pipe.build()
pipe_out = pipe.run()

6 WarpAffine

NVIDIA DALI从入门到放弃之五：Image Processing_2d

def random_transform(index):
    dst_cx, dst_cy = (200,200)
    src_cx, src_cy = (200,200)

    # This function uses homogeneous coordinates - hence, 3x3 matrix

    # translate output coordinates to center defined by (dst_cx, dst_cy)
    t1 = np.array([[1, 0, -dst_cx],
                   [0, 1, -dst_cy],
                   [0, 0, 1]])
    def u():
        return np.random.uniform(-0.5, 0.5)

    # apply a randomized affine transform - uniform scaling + some random distortion
    m = np.array([
        [1 + u(),     u(),  0],
        [    u(), 1 + u(),  0],
        [      0,       0,  1]])

    # translate input coordinates to center (src_cx, src_cy)
    t2 = np.array([[1, 0, src_cx],
                   [0, 1, src_cy],
                   [0, 0, 1]])

    # combine the transforms
    m = (np.matmul(t2, np.matmul(m, t1)))

    # remove the last row; it's not used by affine transform
    return m[0:2,0:3]


def gen_transforms(batch_size, single_transform_fn):
    out = np.zeros([batch_size, 2, 3])
    for i in range(batch_size):
        out[i,:,:] = single_transform_fn(i)
    return out.astype(np.float32)

np.random.seed(seed = 123)

class ExamplePipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id, pipelined = True, exec_async = True):
        super(ExamplePipeline, self).__init__(
            batch_size, num_threads, device_id,
            seed = 12, exec_pipelined=pipelined, exec_async=exec_async)

        # The reader reads raw files from some storage - in this case, a Caffe LMDB container
        self.input = ops.CaffeReader(path = db_folder, random_shuffle = True)
        # The decoder takes tensors containing raw files and outputs images
        # as 3D tensors with HWC layout
        self.decode = ops.ImageDecoder(device = "cpu", output_type = types.RGB)

        # This example uses ExternalSource to provide warp matrices
        self.transform_source = ops.ExternalSource()
        self.iter = 0

        self.warp_gpu = ops.WarpAffine(
            device = "gpu",
            size = (400,400),                 # specify the output size
          # fill_value                        # not specifying `fill_value`
                                              #   results in source coordinate clamping
            interp_type = types.INTERP_LINEAR # use linear interpolation
        )

        self.warp_cpu = ops.WarpAffine(
            device = "cpu",
            fill_value = 200,
            size = (400,400),                 # specify the output size
            interp_type = types.INTERP_NN     # use nearest neighbor interpolation
        )

        self.warp_keep_size = ops.WarpAffine(
            device = "gpu",
          # size                              # keep original canvas size
            interp_type = types.INTERP_LINEAR # use linear interpolation
        )

    # Then, we can tie the operators together to form a graph

    def define_graph(self):
        self.transform = self.transform_source()
        self.jpegs, self.labels = self.input()
        images = self.decode(self.jpegs)
        outputs = [images.gpu()]
        # pass the transform parameters through GPU memory
        outputs += [self.warp_gpu(images.gpu(), self.transform.gpu())]
        # pass the transform through a named input
        outputs += [self.warp_cpu(images, matrix = self.transform).gpu()]

        outputs += [self.warp_keep_size(images.gpu(), self.transform.gpu())]

        return [self.labels, self.transform] + outputs

    # Since we're using ExternalSource, we need to feed the externally provided data to the pipeline

    def iter_setup(self):
        # Generate the transforms for the batch and feed them to the ExternalSource
        self.feed_input(self.transform, gen_transforms(self.batch_size, random_transform))

batch_size = 32
pipe = ExamplePipeline(batch_size=batch_size, num_threads=2, device_id = 0)
pipe.build()
pipe_out = pipe.run()

7 3D Transforms

pipe = Pipeline(batch_size = 1, num_threads = 3, device_id = 0, exec_pipelined = True, exec_async = True)
with pipe:
    data = fn.external_source(source=GetData, device="gpu", layout="DHWC")

    resized = fn.resize(
            data,
            resize_z = 224,
            mode = "stretch",  # scale only Z axis
            interp_type = types.INTERP_LANCZOS3
        )

    angle = fn.random.uniform(range=(-20, 20), seed=123)
    axis = fn.random.uniform(range=(-1,1), shape=[3])
    rotated = fn.rotate(
            resized,
            angle = angle,
            axis = axis,
            interp_type = types.INTERP_LINEAR,
            fill_value = 0  # force out-of-bounds pixels to 0
        )

    warped = fn.warp_affine(
            resized,
            size = (200, 320, 320),
            matrix = (
                1, 1, 0,   -180,
                0, 1, 0.2, -20,
                0, 0, 1,   10
            ),
            interp_type = types.INTERP_LINEAR,
            fill_value = 0  # force out-of-bounds pixels to 0
        )

    pipe.set_outputs(data, resized, rotated, warped, axis, angle)

pipe.build()
pipe_out = pipe.run()