Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Convolution Transpose by yourself

Convolution Transpose by yourself

Beomjun Shin

February 28, 2018
Tweet

More Decks by Beomjun Shin

Other Decks in Programming

Transcript

  1. import numpy as np import tensorflow as tf input =

    np.arange(1, 10, 1).reshape((1, 3, 3, 1)) array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) filter = np.arange(-1, -17, -1).reshape((4, 4, 1, 1)) array([[ -1, -2, -3, -4], [ -5, -6, -7, -8], [ -9, -10, -11, -12], [-13, -14, -15, -16]]) © Beomjun Shin
  2. output = tf.nn.conv2d_transpose( value=input, filter=filter, output_shape=output_shape, strides=strides, padding=padding ) output.reshape((6,

    6)) array([[ -6., -17., -20., -29., -34., -21.], [ -18., -46., -58., -70., -86., -51.], [ -38., -94., -106., -134., -150., -87.], [ -54., -118., -142., -142., -170., -93.], [ -98., -214., -238., -254., -282., -153.], [ -70., -149., -164., -169., -186., -99.]], dtype=float32) © Beomjun Shin
  3. input_conv2d_version = np.array([ [0, 0, 0, 0, 0, 0, 0,

    0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 2, 0, 3, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 4, 0, 5, 0, 6, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 7, 0, 8, 0, 9, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0], ], dtype=np.float64) # Padding & Zero inserted filter_conv2d_version = np.array([ [-16, -15, -14, -13], [-12, -11, -10, -9], [-8, -7, -6, -5], [-4, -3, -2, -1], ], dtype=np.float64) © Beomjun Shin
  4. output_conv2d_version = tf.nn.conv2d( input_conv2d_version, filter=filter_conv2d_version, strides=(1, 1, 1, 1), padding="VALID"

    ) output_conv2d_version.reshape((6, 6)) array([[ -6., -17., -20., -29., -34., -21.], [ -18., -46., -58., -70., -86., -51.], [ -38., -94., -106., -134., -150., -87.], [ -54., -118., -142., -142., -170., -93.], [ -98., -214., -238., -254., -282., -153.], [ -70., -149., -164., -169., -186., -99.]], dtype=float32) assert output == output_conv2d_version # => True!! © Beomjun Shin
  5. tensorflow/python/ops/nn_ops.py def conv2d_transpose( value, filter, # pylint: disable=redefined-builtin output_shape, strides,

    padding="SAME", data_format="NHWC", name=None): (...) return gen_nn_ops.conv2d_backprop_input( input_sizes=output_shape_, filter=filter, out_backprop=value, strides=strides, padding=padding, data_format=data_format, name=name) © Beomjun Shin
  6. Pro Tip - 1 : Compile First! • This source(gen_nn_ops.py)

    is automatically generated by Bazel • If you want to investigate TensorFlow, please compile it first © Beomjun Shin
  7. bazel-genfiles/tensorflow/python/ops/gennnops.py @tf_export('Conv2DBackpropInput') def conv2d_backprop_input(input_sizes, filter, out_backprop, strides, padding, use_cudnn_on_gpu=True, data_format="NHWC",

    dilations=[1, 1, 1, 1], name=None): # (skip...) else: _attr_T, _inputs_T = _execute.args_to_matching_eager([filter, out_backprop], _ctx) (filter, out_backprop) = _inputs_T input_sizes = _ops.convert_to_tensor(input_sizes, _dtypes.int32) _inputs_flat = [input_sizes, filter, out_backprop] _attrs = ("T", _attr_T, "strides", strides, "use_cudnn_on_gpu", use_cudnn_on_gpu, "padding", padding, "data_format", data_format, "dilations", dilations) _result = _execute.execute(b"Conv2DBackpropInput", 1, inputs=_inputs_flat, attrs=_attrs, ctx=_ctx, name=name) _execute.record_gradient( "Conv2DBackpropInput", _inputs_flat, _attrs, _result, name) _result, = _result return _result © Beomjun Shin
  8. Pro Tip - 2 : snake_case to CamelCase • For

    such ops, we can find matched c++ implementation by searching through CamelCase keyword • e.g. conv2d_backprop_input -> Conv2dBackpropInput © Beomjun Shin
  9. tensorflow/core/kernels/conv_grad_input_ops.cc template <typename T> struct LaunchConv2DBackpropInputOp<CPUDevice, T> { void operator()(OpKernelContext*

    ctx, bool use_cudnn, bool cudnn_use_autotune, const Tensor& out_backprop, const Tensor& filter, int row_stride, int col_stride, const Padding& padding, Tensor* in_backprop, TensorFormat data_format) { const CPUDevice& d = ctx->eigen_device<CPUDevice>(); functor::SpatialConvolutionBackwardInput<CPUDevice, T>()( d, in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(), out_backprop.tensor<T, 4>(), row_stride, col_stride); } }; // (...skip) template <typename T> void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()( OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, const Tensor& out_backprop, const Tensor& filter, int row_dilation, int col_dilation, int row_stride, int col_stride, const Padding& padding, Tensor* in_backprop, TensorFormat data_format) { using perftools::gputools::dnn::AlgorithmConfig; using perftools::gputools::dnn::AlgorithmDesc; using perftools::gputools::dnn::ProfileResult; # (...skip) # (...something related with cudnn) © Beomjun Shin
  10. Pro Tip - 3 : We can only see cudnn.h

    • CuDNN is a closed-source low-level library for deep learning primitives developed by NVIDIA. • GPU implementation should be very different than CPU implementation © Beomjun Shin
  11. tensorflow/core/kernels/conv_2d.h > (namespace) Functor template <typename Device, typename T> struct

    SpatialConvolutionBackwardInput { void operator()(const Device& d, typename TTypes<T, 4>::Tensor input_backward, typename TTypes<T, 4>::ConstTensor kernel, typename TTypes<T, 4>::ConstTensor output_backward, int row_stride, int col_stride, int row_dilation, int col_dilation) { // Need to swap row/col when calling Eigen. input_backward.device(d) = Eigen::SpatialConvolutionBackwardInput( kernel, output_backward, input_backward.dimension(2), input_backward.dimension(1), col_stride, row_stride, col_dilation, row_dilation); } }; © Beomjun Shin
  12. template <typename OutputBackward, typename Kernel> EIGEN_ALWAYS_INLINE static const typename internal::conditional<

    internal::traits<OutputBackward>::Layout == ColMajor, TensorReshapingOp< const DSizes<typename internal::traits<OutputBackward>::Index, internal::traits<OutputBackward>::NumDimensions>, const TensorContractionOp< const array< IndexPair<typename internal::traits<OutputBackward>::Index>, 1>, const Eigen::TensorForcedEvalOp<const TensorReshapingOp< const DSizes<typename internal::traits<OutputBackward>::Index, 2>, const TensorShufflingOp< const array< typename internal::traits<OutputBackward>::Index, 4>, const TensorReverseOp<const ReverseColMajor, const Kernel> > > >, const TensorReshapingOp< const DSizes<typename internal::traits<OutputBackward>::Index, 2>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> > > >, TensorReshapingOp< const DSizes<typename internal::traits<OutputBackward>::Index, internal::traits<OutputBackward>::NumDimensions>, const TensorContractionOp< const array< IndexPair<typename internal::traits<OutputBackward>::Index>, 1>, const TensorReshapingOp< const DSizes<typename internal::traits<OutputBackward>::Index, 2>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> >, const Eigen::TensorForcedEvalOp<const TensorReshapingOp< const DSizes<typename internal::traits<OutputBackward>::Index, 2>, const TensorShufflingOp< const array< typename internal::traits<OutputBackward>::Index, 4>, const TensorReverseOp<const ReverseRowMajor, const Kernel> > > > > > >::type SpatialConvolutionBackwardInput © Beomjun Shin
  13. return choose( Cond<internal::traits<OutputBackward>::Layout == ColMajor>(), kernel.reverse(kernel_reverse) .shuffle(kernel_shuffle) .reshape(kernel_dims) .eval() .contract(

    output_backward .extract_image_patches( kernelRows, kernelCols, 1, 1, row_in_stride, col_in_stride, row_stride, col_stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) .reshape(pre_contract_dims), contract_dims) .reshape(post_contract_dims), output_backward .extract_image_patches(kernelRows, kernelCols, 1, 1, row_in_stride, col_in_stride, row_stride, col_stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) .reshape(pre_contract_dims) .contract(kernel.reverse(kernel_reverse) .shuffle(kernel_shuffle) .reshape(kernel_dims) .eval(), contract_dims) .reshape(post_contract_dims)); © Beomjun Shin
  14. kernel.reverse(kernel_reverse) .shuffle(kernel_shuffle) .reshape(kernel_dims) .eval() .contract( output_backward .extract_image_patches( kernelRows, kernelCols, 1,

    1, row_in_stride, col_in_stride, row_stride, col_stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) .reshape(pre_contract_dims), contract_dims) .reshape(post_contract_dims) © Beomjun Shin
  15. Eigen3: First look • Do some .reverse & .shuffle for

    Kernel • contract with output_backward(which was previously Input) • Do extract_image_patches for output_backward -> Great documentation © Beomjun Shin
  16. Shuffle // Shuffle all dimensions to the left by 1.

    Tensor<float, 3> input(20, 30, 50); // ... set some values in input. Tensor<float, 3> output = input.shuffle({1, 2, 0}) eigen_assert(output.dimension(0) == 30); eigen_assert(output.dimension(1) == 50); eigen_assert(output.dimension(2) == 20); © Beomjun Shin
  17. Reverse Eigen::Tensor<int, 2> a(4, 3); a.setValues({{0, 100, 200}, {300, 400,

    500}, {600, 700, 800}, {900, 1000, 1100}}); Eigen::array<bool, 2> reverse({true, false}); Eigen::Tensor<int, 2> b = a.reverse(reverse); cout << "a" << endl << a << endl << "b" << endl << b << endl; => a 0 100 200 300 400 500 600 700 800 900 1000 1100 b 900 1000 1100 600 700 800 300 400 500 0 100 200 © Beomjun Shin
  18. extract_image_patch • The returned tensor has one greater dimension than

    the input tensor, which is used to index each patch • [batch, in_rows, in_cols, depth] [batch, out_rows, out_cols, ksize_rows * ksize_cols * depth] extract_image_patches(const Index patch_rows, const Index patch_cols, const Index row_stride, const Index col_stride, const PaddingType padding_type) extract_image_patches( kernelRows, kernelCols, 1, 1, row_in_stride, col_in_stride, row_stride, col_stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) © Beomjun Shin
  19. Einstein Summation Einstein summation is a notational convention for simplifying

    expressions including summations of vectors, matrices, and general tensors. There are essentially three rules of Einstein summation notation, namely: 1. Repeated indices are implicitly summed over. 2. Each index can appear at most twice in any term. 3. Each term must contain identical non-repeated indices. © Beomjun Shin
  20. Just Matrix Multiplication Eigen::Tensor<int, 2> A(3, 5); Eigen::Tensor<int, 2> B(3,

    5); Eigen::array<int, 1> contraction_indices; // This will contract the first dimension of A with the first dim of B, // effectively computing At*B contraction_indices[0] = {0, 0}; Eigen::Tensor<int, 2> Result = A.contract(B, contraction_indices); © Beomjun Shin
  21. conv2d_transpose is just implemented by direct convolution • So I

    guess, It consists of two parts: 1. Rotate Kernel(reverse, shuffle) 2. contract(=matmul) with image which has inserted zeros (<- [row/col]_in_stride) kernel.reverse(kernel_reverse) .shuffle(kernel_shuffle) .reshape(kernel_dims) .eval() .contract( output_backward .extract_image_patches( kernelRows, kernelCols, 1, 1, row_in_stride, col_in_stride, row_stride, col_stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) .reshape(pre_contract_dims), contract_dims) .reshape(post_contract_dims) © Beomjun Shin
  22. Example Conv2d : Im2Col • In 4-D Tensor case(whatever), we

    can just focus on 2D image part. Then below theory can be useful for that case, too. © Beomjun Shin
  23. We can also think of convolution transpose in a direct

    convolution with inserted zeros and paddings © Beomjun Shin
  24. Why can we think of it as a inverse operation

    of conv2d? • Input <-> Output • by Conv2D <-> by Conv2D (Fractionally Strided Convolution) © Beomjun Shin