0
点赞
收藏
分享

微信扫一扫

NVIDIA DALI从入门到放弃之五:Image Processing

天蓝Sea 2022-08-08 阅读 74

NVIDIA DALI从入门到放弃之一:概述

NVIDIA DALI从入门到放弃之二:入门示例

NVIDIA DALI从入门到放弃之三:Data Loading

NVIDIA DALI从入门到放弃之四:Multiple GPU

NVIDIA DALI从入门到放弃之五:Image Processing

NVIDIA DALI从入门到放弃之六:Geometric Transforms

NVIDIA DALI从入门到放弃之七:Sequence Processing

NVIDIA DALI从入门到放弃之八:PyTorch Plugin API

1 Image Decoder

1-1 CPU

class ImageDecoderCropPipeline(Pipeline):
def __init__(self, batch_size, num_threads, device_id):
super(ImageDecoderCropPipeline, self).__init__(batch_size, num_threads, device_id, seed = seed)
self.input = ops.FileReader(file_root = image_dir)
self.pos_rng_x = ops.random.Uniform(range = (0.0, 1.0))
self.pos_rng_y = ops.random.Uniform(range = (0.0, 1.0))
self.decode = ops.ImageDecoderCrop(device = 'cpu', output_type = types.RGB, crop = (224, 224))

def define_graph(self):
jpegs, labels = self.input()
pos_x = self.pos_rng_x()
pos_y = self.pos_rng_y()
images = self.decode(jpegs, crop_pos_x=pos_x, crop_pos_y=pos_y)
return (images, labels)

pipe = ImageDecoderCropPipeline(batch_size, 1, 0)
pipe.build()
images, _ = pipe.run()
show_images( images )

1-2 GPU

import numpy as np

class ExternalInputIterator(object):
def __init__(self, batch_size):
self.batch_size = batch_size

def __iter__(self):
self.i = 0
self.n = self.batch_size
return self

def __next__(self):
pos = []
size = []
for _ in range(self.batch_size):
pos.append(np.asarray([0.4, 0.2], dtype=np.float32))
size.append(np.asarray([0.3, 0.5], dtype=np.float32))
self.i = (self.i + 1) % self.n
return (pos, size)

next = __next__

eii = ExternalInputIterator(batch_size)
pos_size_iter = iter(eii)

class ImageDecoderSlicePipeline(Pipeline):
def __init__(self, batch_size, num_threads, device_id):
super(ImageDecoderSlicePipeline, self).__init__(batch_size, num_threads, device_id, seed = seed)

self.input = ops.FileReader(file_root = image_dir)
self.input_crop_pos = ops.ExternalSource()
self.input_crop_size = ops.ExternalSource()
self.input_crop = ops.ExternalSource()
self.decode = ops.ImageDecoderSlice(device = 'mixed', output_type = types.RGB)

def define_graph(self):
jpegs, labels = self.input()
self.crop_pos = self.input_crop_pos()
self.crop_size = self.input_crop_size()
images = self.decode(jpegs, self.crop_pos, self.crop_size)
return (images, labels)

def iter_setup(self):
(crop_pos, crop_size) = pos_size_iter.next()
self.feed_input(self.crop_pos, crop_pos)
self.feed_input(self.crop_size, crop_size)

pipe = ImageDecoderSlicePipeline(batch_size, 1, 0)
pipe.build()
images, _ = pipe.run()
show_images( images.as_cpu() )

2 Color Space Conversion

2-1 CPU

class ColorCpuPipeline(Pipeline):
def __init__(self, batch_size, num_threads, device_id):
super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)
self.input = ops.FileReader(file_root = image_dir)
self.decode = ops.ImageDecoder(device = 'cpu', output_type = types.RGB)
self.conversions = {}
self.conversions['rgb2bgr'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.BGR)
self.conversions['rgb2ycbcr'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.YCbCr)
self.conversions['rgb2gray'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.GRAY)

def define_graph(self):
self.jpegs, self.labels = self.input()
images = self.decode(self.jpegs)
outputs = [images]
outs = [images for _ in range(n)]
outs[0] = self.conversions['rgb2bgr'](outs[0])
outs[1] = self.conversions['rgb2ycbcr'](outs[1])
outs[2] = self.conversions['rgb2gray'](outs[2])
return outputs +

2-2 GPU

class ColorGpuPipeline(Pipeline):
def __init__(self, batch_size, num_threads, device_id):
super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)
self.input = ops.FileReader(file_root = image_dir)
self.decode = ops.ImageDecoder(device="mixed", output_type = types.RGB)
self.conversions = {}
self.conversions['rgb2bgr'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.BGR)
self.conversions['rgb2ycbcr'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.YCbCr)
self.conversions['rgb2gray'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.GRAY)

def define_graph(self):
self.jpegs, self.labels = self.input()
images = self.decode(self.jpegs)
outputs = [images.gpu()]
outs = [images.gpu() for _ in range(n)]
outs[0] = self.conversions['rgb2bgr'](outs[0])
outs[1] = self.conversions['rgb2ycbcr'](outs[1])
outs[2] = self.conversions['rgb2gray'](outs[2])
return outputs +

3 BrightnessContrast

3-1 CPU

class BCCpuPipeline(Pipeline):
def __init__(self, batch_size, num_threads, device_id):
super(BCCpuPipeline, self).__init__(batch_size, num_threads, device_id, seed=42)
self.input = ops.FileReader(device="cpu", file_root=image_filename)
self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)
self.bc = ops.BrightnessContrast(device="cpu", brightness_shift=0.3, contrast=0.4, contrast_center=100)

def define_graph(self):
read, _ = self.input()
image = self.decode(read)
converted = self.bc(image)
return image, converted

pipecpu = BCCpuPipeline(batch_size=batch_size, num_threads=1, device_id=0)
pipecpu.build()
cpu_output = pipecpu.run()

3-2 GPU

class BCGpuPipeline(Pipeline):
def __init__(self, batch_size, num_threads, device_id):
super(BCGpuPipeline, self).__init__(batch_size, num_threads, device_id, seed=42)
self.input = ops.FileReader(device="cpu", file_root=image_filename)
self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
self.bc = ops.BrightnessContrast(device="gpu",contrast=1.5, brightness_shift = 1, brightness=-1)

def define_graph(self):
read, _ = self.input()
image = self.decode(read)
converted = self.bc(image.gpu())
return image, converted

pipegpu = BCGpuPipeline(batch_size=batch_size, num_threads=1, device_id=0)
pipegpu.build()
gpu_output = pipegpu.run()

4 HSV

4-1 CPU

class HsvCpuPipeline(Pipeline):
def __init__(self, batch_size, num_threads, device_id):
super().__init__(batch_size, num_threads, device_id, seed=42)
self.input = ops.FileReader(device="cpu", file_root=image_filename)
self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)
self.hsv = ops.Hsv(device="cpu", hue=120, saturation=1, value=0.4)

def define_graph(self):
read, _ = self.input()
image = self.decode(read)
converted = self.hsv(image)
return image,

4-2 GPU

class HsvGpuPipeline(Pipeline):
def __init__(self, batch_size, num_threads, device_id):
super().__init__(batch_size, num_threads, device_id, seed=42)
self.input = ops.FileReader(device="cpu", file_root=image_filename)
self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
self.hsv = ops.Hsv(device="gpu", hue=120, saturation=2, value=1)

def define_graph(self):
read, _ = self.input()
image = self.decode(read)
converted = self.hsv(image.gpu())
return image,

5 Resize

Scaling modes

“default” - the dimensions which are specified, are scaled to the requested size; the missing extents are calculated by applying average scale of the provided extents - for 2D and one extent specified, this means that aspect ratio is preserved

“stretch” - the dimensions which are specified, are scaled to the requested size; the missing ones are not scaled at all

“not_larger” - the image is scaled so that no dimension exceeds the specified size; aspect ratio is preserved

“not_smaller” - the image is scaled so that no dimension is smaler than specified (additionally, max_size argument may be used to limit upscaling of inputs with very high aspect ratios)

batch_size = 8

pipe = dali.pipeline.Pipeline(batch_size, 3, 0)
with pipe:
files, labels = dali.fn.caffe_reader(path = db_folder, random_shuffle = True, seed = 1234)
images = dali.fn.image_decoder(files, device = "mixed")

width = 200
height = 100

out = [
dali.fn.resize(images, size=[height, width]),
dali.fn.resize(images, resize_x=width),
dali.fn.resize(images, resize_y=height),
dali.fn.resize(images, size=[height, width], mode="stretch"),
dali.fn.resize(images, resize_x=width, mode="stretch"),
dali.fn.resize(images, resize_y=height, mode="stretch"),
dali.fn.resize(images, size=[height, width], mode="not_larger"),
dali.fn.resize(images, size=[height, width], mode="not_smaller"),
dali.fn.resize(images, size=[height, width], mode="not_smaller", max_size=[110,200]),
]

pipe.set_outputs(*out)

pipe.build()
pipe_out = pipe.run()

6 WarpAffine

NVIDIA DALI从入门到放弃之五:Image Processing_2d

def random_transform(index):
dst_cx, dst_cy = (200,200)
src_cx, src_cy = (200,200)

# This function uses homogeneous coordinates - hence, 3x3 matrix

# translate output coordinates to center defined by (dst_cx, dst_cy)
t1 = np.array([[1, 0, -dst_cx],
[0, 1, -dst_cy],
[0, 0, 1]])
def u():
return np.random.uniform(-0.5, 0.5)

# apply a randomized affine transform - uniform scaling + some random distortion
m = np.array([
[1 + u(), u(), 0],
[ u(), 1 + u(), 0],
[ 0, 0, 1]])

# translate input coordinates to center (src_cx, src_cy)
t2 = np.array([[1, 0, src_cx],
[0, 1, src_cy],
[0, 0, 1]])

# combine the transforms
m = (np.matmul(t2, np.matmul(m, t1)))

# remove the last row; it's not used by affine transform
return m[0:2,0:3]


def gen_transforms(batch_size, single_transform_fn):
out = np.zeros([batch_size, 2, 3])
for i in range(batch_size):
out[i,:,:] = single_transform_fn(i)
return out.astype(np.float32)

np.random.seed(seed = 123)

class ExamplePipeline(Pipeline):
def __init__(self, batch_size, num_threads, device_id, pipelined = True, exec_async = True):
super(ExamplePipeline, self).__init__(
batch_size, num_threads, device_id,
seed = 12, exec_pipelined=pipelined, exec_async=exec_async)

# The reader reads raw files from some storage - in this case, a Caffe LMDB container
self.input = ops.CaffeReader(path = db_folder, random_shuffle = True)
# The decoder takes tensors containing raw files and outputs images
# as 3D tensors with HWC layout
self.decode = ops.ImageDecoder(device = "cpu", output_type = types.RGB)

# This example uses ExternalSource to provide warp matrices
self.transform_source = ops.ExternalSource()
self.iter = 0

self.warp_gpu = ops.WarpAffine(
device = "gpu",
size = (400,400), # specify the output size
# fill_value # not specifying `fill_value`
# results in source coordinate clamping
interp_type = types.INTERP_LINEAR # use linear interpolation
)

self.warp_cpu = ops.WarpAffine(
device = "cpu",
fill_value = 200,
size = (400,400), # specify the output size
interp_type = types.INTERP_NN # use nearest neighbor interpolation
)

self.warp_keep_size = ops.WarpAffine(
device = "gpu",
# size # keep original canvas size
interp_type = types.INTERP_LINEAR # use linear interpolation
)

# Then, we can tie the operators together to form a graph

def define_graph(self):
self.transform = self.transform_source()
self.jpegs, self.labels = self.input()
images = self.decode(self.jpegs)
outputs = [images.gpu()]
# pass the transform parameters through GPU memory
outputs += [self.warp_gpu(images.gpu(), self.transform.gpu())]
# pass the transform through a named input
outputs += [self.warp_cpu(images, matrix = self.transform).gpu()]

outputs += [self.warp_keep_size(images.gpu(), self.transform.gpu())]

return [self.labels, self.transform] + outputs

# Since we're using ExternalSource, we need to feed the externally provided data to the pipeline

def iter_setup(self):
# Generate the transforms for the batch and feed them to the ExternalSource
self.feed_input(self.transform, gen_transforms(self.batch_size, random_transform))

batch_size = 32
pipe = ExamplePipeline(batch_size=batch_size, num_threads=2, device_id = 0)
pipe.build()
pipe_out = pipe.run()

7 3D Transforms

pipe = Pipeline(batch_size = 1, num_threads = 3, device_id = 0, exec_pipelined = True, exec_async = True)
with pipe:
data = fn.external_source(source=GetData, device="gpu", layout="DHWC")

resized = fn.resize(
data,
resize_z = 224,
mode = "stretch", # scale only Z axis
interp_type = types.INTERP_LANCZOS3
)

angle = fn.random.uniform(range=(-20, 20), seed=123)
axis = fn.random.uniform(range=(-1,1), shape=[3])
rotated = fn.rotate(
resized,
angle = angle,
axis = axis,
interp_type = types.INTERP_LINEAR,
fill_value = 0 # force out-of-bounds pixels to 0
)

warped = fn.warp_affine(
resized,
size = (200, 320, 320),
matrix = (
1, 1, 0, -180,
0, 1, 0.2, -20,
0, 0, 1, 10
),
interp_type = types.INTERP_LINEAR,
fill_value = 0 # force out-of-bounds pixels to 0
)

pipe.set_outputs(data, resized, rotated, warped, axis, angle)

pipe.build()
pipe_out = pipe.run()


举报

相关推荐

0 条评论