Source code for tf_ops.general

""" A collection of helper tf functions.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import math
from tensorflow.python.layers import convolutional, normalization
from tensorflow.python.ops import init_ops
from tf_ops.wave_ops import lazy_wavelet, lazy_wavelet_inv


def build_optimizer(opt_method, lr, loss, max_gradient_norm=None,
                    global_step=None, decay_steps=None):
    """ Build an optimizer and return a training op.

    Will also add checks for Nans in the gradient, and add some monitoring to
    tensorboard.

    Parameters
    ----------
    opt_method : str
        Either 'adam', 'sgd', or 'momentum'
    lr : float
        Learning rate for the optimizer
    loss : tf.Tensor
        Tensor containing the loss operation
    max_gradient_norm : float or None
        What the gradients should be clipped to. If None, no clipping used.
    global_step : tf.Variable or None
        Variable holding the global step
    decay_steps : int
        For sgd only. After how many steps to decay the learning rate.

    Returns
    -------
    train_op : tf op
        An op that can be run in a session to apply gradients to the trainable
        variables.
    """

    with tf.variable_scope('optimizer'):
        # Build the optimizer
        if opt_method == 'adam':
            print('Optimizing with Adam')
            opt = tf.train.AdamOptimizer(lr, epsilon=1e-6)
        elif opt_method == 'momentum':
            print('Optimizing with momentum')
            opt = tf.train.MomentumOptimizer(lr,
                                             momentum=0.9,
                                             use_nesterov=True)
        elif opt_method == 'sgd':
            # Decay the learning rate exponentially based on the number of steps
            if decay_steps is not None:
                lr = tf.train.exponential_decay(lr,
                                                global_step,
                                                decay_steps=500*100,
                                                decay_rate=0.1,
                                                staircase=True)
            opt = tf.train.GradientDescentOptimizer(lr)
            tf.summary.scalar('learning_rate', lr)

        params = tf.trainable_variables()
        gradients = tf.gradients(loss, params)

        # Collect the gradients and clip them
        if max_gradient_norm is not None:
            gradients, norm = tf.clip_by_global_norm(
                gradients, max_gradient_norm)
        else:
            norm = tf.sqrt(tf.reduce_sum(
                [tf.reduce_sum(g**2) for g in gradients if g is not None]))

        # Add checks on the gradients for Nans
        grad_check = [tf.check_numerics(g, '{} gradient nan'.format(p.name)) for
                      g, p in zip(gradients, params) if g is not None]

        grad_check.append(tf.check_numerics(norm, 'global clip val nan'))

        # Ensure the gradient check is done before applying the gradients
        with tf.control_dependencies(grad_check):
            train_op = opt.apply_gradients(
                zip(gradients, params), global_step=global_step)

    # Function to make a nicer name for display
    def strip_name(name):
        name = name.split(':')[0].split('/')
        if 'fwd' in name:
            name.remove('fwd')
        if 'batch_normalization' in name:
            name.remove('batch_normalization')
        return '/'.join(name)

    # Add some tensorboard logging
    with tf.variable_scope('grads'):
        tf.summary.scalar('all', norm)
        [tf.summary.scalar('{}'.format(strip_name(p.name)), tf.norm(g))
         for g, p in zip(gradients, params) if g is not None]

        # Add histograms for gradients.
        [tf.summary.histogram('{}'.format(strip_name(p.name)), g)
         for g, p in zip(gradients, params) if g is not None]

    return train_op


[docs]def variable_with_wd(name, shape, stddev=None, wd=None, norm=2):
    """ Helper to create an initialized variable with weight decay.

    Note that the variable is initialized with a truncated normal distribution.
    A weight decay is added only if one is specified. Also will add summaries
    for this variable.

    Internally, it calls tf.get_variable, so you can use this to re-get already
    defined variables (so long as the reuse scope is set to true). If it
    re-fetches an already existing variable, it will not add regularization
    again.

    Parameters
    ----------
    name: str
        name of the variable
    shape: list of ints
        shape of the variable you want to create
    stddev: positive float or None
        standard deviation of a truncated Gaussian
    wd: positive float or None
        add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this variable.
    norm: positive float
        Which regularizer to apply. E.g. norm=2 uses L2 regularization, and
        norm=p adds :math:`wd \\times ||w||_{p}^{p}` to the
        REGULARIZATION_LOSSES. See :py:func:`real_reg`.

    Returns
    -------
    out : variable tensor
    """
    if stddev is None:
        stddev = get_xavier_stddev(shape, uniform=False)
    initializer = tf.truncated_normal_initializer(stddev=stddev)

    var_before = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    var = tf.get_variable(name, shape, dtype=tf.float32,
                          initializer=initializer)
    var_after = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

    if len(var_before) != len(var_after):
        reg_loss = complex_reg(var, wd, norm)
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, reg_loss)
        variable_summaries(var, name)

    return var


[docs]def variable_summaries(var, name='summaries'):
    """Attach a lot of summaries to a variable (for TensorBoard visualization).

    Parameters
    ----------
    var : :py:class:`tf.Tensor`
        variable for which you wish to create summaries
    name : str
        scope under which you want to add your summary ops
    """
    with tf.name_scope(name + '_summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)), name='stddev')
        tf.summary.scalar('standard_deviation', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)


[docs]def loss(labels, logits, one_hot=True, num_classes=None, λ=1):
    """ Compute sum of data + regularization losses.

    loss = data_loss + λ * reg_losses

    The regularization loss will sum over all the variables that already
    exist in the GraphKeys.REGULARIZATION_LOSSES.

    Parameters
    ----------
    labels : ndarray(dtype=float, ndim=(N,C))
        The vector of labels.
    one_hot : bool
        True if the labels input is one_hot.
    num_classes : int
        Needed if the labels aren't one-hot already.
    logits : tf.Variable
        Logit outputs from the neural net.
    λ : float
        Multiplier to use on all regularization losses. Be careful not
        to apply things twice, as all the functions in this module typically set
        regularization losses at a block level (for more fine control).
        For this reason it defaults to 1, but can be useful to set to some other
        value to get quick scaling of loss terms.

    Returns
    -------
    losses : tuple of (loss, data_loss, reg_loss)
        For optimization, only need to use the first element in the tuple. I
        return the other two for displaying purposes.
    """
    with tf.variable_scope('data_loss'):
        tf.summary.histogram('logits', logits)
        tf.summary.histogram('softmax', tf.nn.softmax(logits))
        if not one_hot:
            labels = tf.one_hot(labels, depth=num_classes, axis=-1)

        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            labels=labels, logits=logits)
        data_loss = tf.reduce_mean(cross_entropy, name='cross_entropy')
    with tf.variable_scope('reg_loss'):
        reg_variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        reg_term = tf.reduce_sum(reg_variables)

    with tf.variable_scope('loss'):
        loss = data_loss + λ*reg_term

    return loss, data_loss, reg_term


def fixed_padding(inputs, kernel_size, data_format):
    """Pads the input along the spatial dimensions independently of input size.

    Parameters
    ----------
    inputs: tf.Tensor
        A tensor of size [batch, channels, height_in, width_in] or
        [batch, height_in, width_in, channels] depending on data_format.
    kernel_size: int
        The kernel to be used in the conv2d or max_pool2d operation.
        Should be a positive integer.
    data_format: str
        The input format ('channels_last' or 'channels_first').

    Returns
    -------
    y : tf.Tensor
        A tensor with the same format as the input with the data either intact
        (if kernel_size == 1) or padded (if kernel_size > 1).
    """

    pad_total = kernel_size - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg

    if data_format == 'channels_first':
        padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
                               [pad_beg, pad_end], [pad_beg, pad_end]])
    else:
        padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
                               [pad_beg, pad_end], [0, 0]])
    return padded_inputs


def _residual_core(x, filters, kernel_size=3, stride=1, train=True, wd=0.0,
                   bn_momentum=0.99, bn_epsilon=0.001):
    """ Core function of a residual unit.

    In -> conv -> bn -> relu -> conv

    Note that the normal residual layer has a batch norm and relu before the
    first conv. This is in the residual function which calls this.

    Parameters
    ----------
    x : tf tensor
        Input to be modified
    filters : int
        Number of output filters (will be used for all convolutions in the
        resnet core).
    kernel_size : int
        Size of the filter kernels
    stride : int
        Conv stride
    train : bool or tf boolean tensor
        Whether we are in the train phase or not. Can set to a tensorflow tensor
        so that it can be modified on the fly.
    wd : float
        Weight decay term for the convolutional weights
    bn_momentum : float
        The momentum for the batch normalization layers in the resnet
    bn_epsilon : float
        The epsilon for the batch normalization layers in the resnet
    """

    init = init_ops.VarianceScaling(scale=1.0, mode='fan_out')
    reg = lambda w: real_reg(w, wd, norm=2)
    bn_class = lambda name: normalization.BatchNormalization(
        name=name, momentum=bn_momentum, epsilon=bn_epsilon)
    conv_class = lambda name, stride: convolutional.Conv2D(
        filters, 3, (stride, stride), use_bias=False,
        padding=('SAME' if stride == 1 else 'VALID'),
        kernel_initializer=init, kernel_regularizer=reg, name=name)

    with tf.variable_scope('sub1'):
        # As we will do downsampling with strides, need to make sure the output
        # size is the correct format.
        if stride > 1:
            x = fixed_padding(x, kernel_size, 'channels_last')

        conv = conv_class('conv1', stride)
        x = conv.apply(x)

    with tf.variable_scope('sub2'):
        bn = bn_class('between_bn')
        x = bn.apply(x, training=train)
        x = tf.nn.relu(x)
        conv = conv_class('conv2', 1)
        x = conv.apply(x)

    return x


[docs]def residual(x, filters, kernel_size=3, stride=1, train=True, wd=0.0,
             bn_momentum=0.99, bn_epsilon=0.001, name='res'):
    """ Residual layer

    Uses the _residual_core function to create F(x), then adds x to it.

    Parameters
    ----------
    x : tf tensor
        Input to be modified
    filters : int
        Number of output filters (will be used for all convolutions in the
        resnet core).
    stride : int
        Conv stride
    train : bool or tf boolean tensor
        Whether we are in the train phase or not. Can set to a tensorflow tensor
        so that it can be modified on the fly.
    wd : float
        Weight decay term for the convolutional weights
    bn_momentum : float
        The momentum for the batch normalization layers in the resnet
    bn_epsilon : float
        The epsilon for the batch normalization layers in the resnet

    Notes
    -----
    When training, the moving_mean and moving_variance need to be updated. By
    default the update ops are placed in tf.GraphKeys.UPDATE_OPS, so they need
    to be added as a dependency to the train_op. For example::

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss)
    """
    bn_class = lambda name: normalization.BatchNormalization(
        name=name, momentum=bn_momentum, epsilon=bn_epsilon)

    orig_x = x
    with tf.variable_scope(name):
        bn = bn_class('init_bn')
        x = bn.apply(x, training=train)
        x = tf.nn.relu(x)

        # The projection shortcut should come after the first batch norm and
        # ReLU since it performs a 1x1 convolution.
        if stride > 1:
            orig_x = tf.layers.conv2d(
                orig_x, filters=filters, strides=stride,
                kernel_size=1, padding='VALID', use_bias=False,
                kernel_initializer=tf.variance_scaling_initializer(),
                data_format='channels_last')

        x = _residual_core(x, filters, kernel_size, stride, train, wd,
                           bn_momentum, bn_epsilon)

        y = tf.add(x, orig_x)

    return y


def lift_residual_resample(x1, x2, filters, train=True, downsize=True,
                           wd=0.0001):
    """Define a Lifting Layer with resizing

    The P and the U blocks for this lifting layer are non-linear functions.
    These are the same form as the F(x) in a residual layer (i.e.  two
    convolutions). In block form, a lifting layer looks like this::

             _______________
            |               |
        x1->|---(+)---------|->d
            |    ^      |   |
            |    |      |   |
            |   ---    ---  |
            |  |-P |  | U | |
            |   ---    ---  |
            |    |      |   |
            |    |      v   |
        x2->|----------(+)--|->s
            |_______________|

    Parameters
    ----------
    x1 : tf tensor
        Input tensor 1
    x2 : tf tensor
        Input tensor 2
    filters : int
        Number of output channels for P*x2 and U*d
    train : bool or tf boolean tensor
        Whether we are in the train phase or not. Can set to a tensorflow tensor
        so that it can be modified on the fly.
    wd : float
        Weight decay term for the convolutional weights

    Returns
    -------
    d : tf tensor
        Detail coefficients
    s : tf tensor
        Scale coefficients
    """
    assert x1.get_shape().as_list() == x2.get_shape().as_list()

    # Split the two inputs
    if downsize:
        x1 = lazy_wavelet(x1)
        x2 = lazy_wavelet(x2)

    in_channels = x1.get_shape().as_list()[-1]
    if (filters % in_channels != 0):
        raise ValueError('Can only expand an image by an integer number ' +
                         'of its channels')

    # If the requested number of output channels is larger than the input,
    # stack the input to make them match.
    if filters != in_channels:
        # We've already checked that the remainder is 0 so can do integer div
        nstack = filters // in_channels
        x1 = tf.concat([x1]*nstack, axis=-1)

    d, s = lift_residual(x1, x2, train, wd)

    return d, s


def lift_residual_resample_inv(d, s, out_size, train=True, wd=0.0001):
    """Define a inverse Lifting Layer with resizing

    The P and the U blocks for this lifting layer are non-linear functions.
    These are the same form as the F(x) in a residual layer (i.e.  two
    convolutions). In block form, a lifting layer looks like this::

    We share the variables with the forward lifting.

    In block form, the inverse lifting layer looks like this (note the sign swap
    and flow direction reversal compared to the forward case)::

             _______________
            |               |
        x1<-|---(+)---------|<-d
            |    ^      |   |
            |    |      |   |
            |   ---    ---  |
            |  | P |  |-U | |
            |   ---    ---  |
            |    |      |   |
            |    |      v   |
        x2<-|----------(+)--|<-s
            |_______________|

    Parameters
    ----------
    d : tf tensor
        Input tensor 1
    s : tf tensor
        Input tensor 2
    out_size : list of ints
        Size of the resulting x1 tensors.
    train : bool or tf boolean tensor
        Whether we are in the train phase or not. Can set to a tensorflow tensor
        so that it can be modified on the fly.
    wd : float
        Weight decay term for the convolutional weights

    Returns
    -------
    x1 : tf tensor
        Reconstructed x1
    x2 : tf tensor
        Reconstructed x2
    """
    assert d.get_shape().as_list() == s.get_shape().as_list()
    x1, x2 = lift_residual_inv(d, s, train, wd)

    # Recombine the two outputs
    if out_size[-1] != x1.get_shape().as_list()[-1]:
        x1 = lazy_wavelet_inv(x1, out_size)
        x2 = lazy_wavelet_inv(x2, out_size)

    return x1, x2


[docs]def lift_residual(x1, x2, train=True, wd=0.0001):
    """Define a Lifting Layer

    The P and the U blocks for this lifting layer are non-linear functions.
    These are the same form as the F(x) in a residual layer (i.e.  two
    convolutions). In block form, a lifting layer looks like this::

             _______________
            |               |
        x1->|---(+)---------|->d
            |    ^      |   |
            |    |      |   |
            |   ---    ---  |
            |  |-P |  | U | |
            |   ---    ---  |
            |    |      |   |
            |    |      v   |
        x2->|----------(+)--|->s
            |_______________|

    Parameters
    ----------
    x1 : tf tensor
        Input tensor 1
    x2 : tf tensor
        Input tensor 2
    train : bool or tf boolean tensor
        Whether we are in the train phase or not. Can set to a tensorflow tensor
        so that it can be modified on the fly.
    wd : float
        Weight decay term for the convolutional weights

    Returns
    -------
    d : tf tensor
        Detail coefficients
    s : tf tensor
        Scale coefficients
    """
    filters = x1.get_shape().as_list()[-1]
    assert filters == x2.get_shape().as_list()[-1]
    with tf.variable_scope('P'):
        # Calculate d = x1 - Px2
        d = x1 - _residual_core(x2, filters, 1, train, wd)

    with tf.variable_scope('U'):
        # Calculate s = x2 + Ud
        s = x2 + _residual_core(d, filters, 1, train, wd)

    return d, s


[docs]def lift_residual_inv(d, s, train=True, wd=0.0001):
    """Define the inverse of a lifting layer

    We share the variables with the forward lifting.

    In block form, the inverse lifting layer looks like this (note the sign swap
    and flow direction reversal compared to the forward case)::

             _______________
            |               |
        x1<-|---(+)---------|<-d
            |    ^      |   |
            |    |      |   |
            |   ---    ---  |
            |  | P |  |-U | |
            |   ---    ---  |
            |    |      |   |
            |    |      v   |
        x2<-|----------(+)--|<-s
            |_______________|


    Parameters
    ----------
    d : tf tensor
        Input tensor 1
    s : tf tensor
        Input tensor 2
    filters : int
        Number of output channels for Px2 and Ud
    train : bool or tf boolean tensor
        Whether we are in the train phase or not. Can set to a tensorflow tensor
        so that it can be modified on the fly.
    wd : float
        Weight decay term for the convolutional weights

    Returns
    -------
    x1 : tf tensor
        Reconstructed x1
    x2 : tf tensor
        Reconstructed x2
    """
    filters = d.get_shape().as_list()[-1]
    assert filters == s.get_shape().as_list()[-1]
    with tf.variable_scope('U') as scope:
        # Calculate x2 = s - Ud
        scope.reuse_variables()
        x2 = s - _residual_core(d, filters, 1, train, wd)

    with tf.variable_scope('P') as scope:
        # Calculate x_e = d + Px2
        scope.reuse_variables()
        x1 = d + _residual_core(x2, filters, 1, train, wd)

    return x1, x2


[docs]def complex_convolution(x, output_dim, size=3, stride=1, stddev=None,
                        wd=0.0, norm=1.0, name='conv2d', with_bias=False,
                        bias_start=0.0):
    """Function to do complex convolution

    In a similar way we have a convenience function, :py:func:`convolution` to
    wrap tf.nn.conv2d (create variables, add a relu, etc.), this function wraps
    :py:func:`cconv2d`. If you want more fine control over things, use
    cconv2d directly, but for most purposes, this function should do
    what you need. Adds the variables to tf.GraphKeys.REGULARIZATION_LOSSES
    if the wd parameter is positive.

    Parameters
    ----------
    x : :py:class:`tf.Tensor`
        The input variable
    output_dim : int
        number of filters to have
    size : int
        kernel spatial support
    stride : int
        what stride to use for convolution
    stddev : None or positive float
        Initialization stddev. If set to None, will use
        :py:func:`get_xavier_stddev`
    wd : None or positive float
        What weight decay to use
    norm : positive float
        Which regularizer to apply. E.g. norm=2 uses L2 regularization, and
        norm=p adds :math:`wd \\times ||w||_{p}^{p}` to the
        REGULARIZATION_LOSSES. See :py:func:`real_reg`.
    name : str
        The tensorflow variable scope to create the variables under
    with_bias : bool
        add a bias after convolution? (this will be ignored if batch norm is
        used)
    bias_start : complex float
        If a bias is used, what to initialize it to.

    Returns
    -------
    y : :py:class:`tf.Tensor`
        Result of applying complex convolution to x
    """

    varlist = []
    with tf.variable_scope(name):
        # Define the real and imaginary components of the weights
        w_shape = [size, size, x.get_shape().as_list()[-1], output_dim]
        w_r = variable_with_wd('w_real', w_shape, stddev, wd, norm)
        w_i = variable_with_wd('w_imag', w_shape, stddev, wd, norm)
        w = tf.complex(w_r, w_i)
        varlist.append(w)

        y = cconv2d(x, w, strides=[1, stride, stride, 1], name=name)
        y_r, y_i = tf.real(y), tf.imag(y)

        if with_bias:
            init = tf.constant_initializer(bias_start)
            b_r = tf.get_variable('b_real', [output_dim], initializer=init)
            b_i = tf.get_variable('b_imag', [output_dim], initializer=init)
            varlist.append(tf.complex(b_r, b_i))
            y_r = tf.add(y_r, b_r)
            y_i = tf.add(y_i, b_i)

    y = tf.complex(y_r, y_i)

    # Return the results
    return y


[docs]def complex_convolution_transpose(x, output_dim, shape, size=3, stride=1,
                                  stddev=None, wd=0.0, norm=1, name='conv2d'):
    """Function to do the conjugate transpose of complex convolution

    In a similar way we have a convenience function, :py:func:`convolution` to
    wrap tf.nn.conv2d (create variables, add a relu, etc.), this function wraps
    :py:func:`cconv2d_transpose`. If you want more fine control over things, use
    cconv2d_transpose directly, but for most purposes, this function should do
    what you need. Adds the variables to tf.GraphKeys.REGULARIZATION_LOSSES
    if the wd parameter is positive.

    We do not subtract the bias after doing the transpose convolution.

    Parameters
    ----------
    x : :py:class:`tf.Tensor`
        The input variable
    output_dim : int
        number of filters to have
    output_shape : list-like or 1-d Tensor
        list/tensor representing the output shape of the deconvolution op
    size : int
        kernel spatial support
    stride : int
        what stride to use for convolution
    stddev : None or positive float
        Initialization stddev. If set to None, will use
        :py:func:`get_xavier_stddev`
    wd : None or positive float
        What weight decay to use
    norm : positive float
        Which regularizer to apply. E.g. norm=2 uses L2 regularization, and
        norm=p adds :math:`wd \\times ||w||_{p}^{p}` to the
        REGULARIZATION_LOSSES. See :py:func:`real_reg`.
    name : str
        The tensorflow variable scope to create the variables under

    Returns
    -------
    y : :py:class:`tf.Tensor`
        Result of applying complex convolution transpose to x
    """

    varlist = []
    with tf.variable_scope(name):
        # Define the real and imaginary components of the weights
        w_shape = [size, size, x.get_shape().as_list()[-1], output_dim]
        w_r = variable_with_wd('w_real', w_shape, stddev, wd, norm)
        w_i = variable_with_wd('w_imag', w_shape, stddev, wd, norm)
        w = tf.complex(w_r, w_i)
        varlist.append(w)

        y = cconv2d_transpose(
            x, w, output_dim, strides=[1, stride, stride, 1], name=name)
        y_r, y_i = tf.real(y), tf.imag(y)

    y = tf.complex(y_r, y_i)

    # Return the results
    return y


[docs]def cconv2d(x, w, **kwargs):
    """ Performs convolution with complex inputs and weights

    Need to create the weights and feed to this function. If you want to have
    this done for you automatically, use :py:func:`complex_convolution`.

    Parameters
    ----------
    x : tf tensor
        input tensor
    w : tf tensor
        weights tensor
    kwargs : (key, val) pairs
        Same as tf.nn.conv2d

    Returns
    -------
    y : :py:class:`tf.Tensor`
        Result of applying convolution to x

    Notes
    -----
    Uses tf.nn.conv2d which I believe is actually cross-correlation.
    """
    default_args = {
        'strides': [1, 1, 1, 1],
        'padding': 'SAME',
        'data_format': "NHWC",
        'name': None
    }
    for key, val in kwargs.items():
        if key not in default_args.keys():
            raise KeyError(
                'Unknown argument {} for function tf.nn.conv2d'.format(key))
        else:
            default_args[key] = val

    x = tf.cast(x, tf.complex64)
    w = tf.cast(w, tf.complex64)
    x_r = tf.real(x)
    x_i = tf.imag(x)
    w_r = tf.real(w)
    w_i = tf.imag(w)
    conv = lambda x, w: tf.nn.conv2d(x, w, **default_args)
    y_r = conv(x_r, w_r) - conv(x_i, w_i)
    y_i = conv(x_i, w_r) + conv(x_r, w_i)

    return tf.complex(y_r, y_i)


[docs]def cconv2d_transpose(y, w, output_shape, **kwargs):
    """ Performs transpose convolution with complex outputs and weights.

    Need to create the weights and feed to this function. If you want to have
    this done for you automatically, use
    :py:func:`complex_convolution_transpose`.

    Parameters
    ----------
    x : tf tensor
        input tensor
    w : tf tensor
        weights tensor
    kwargs : (key, val) pairs
        Same as tf.nn.conv2d_transpose

    Notes
    -----
    Takes the complex conjugate of w before doing convolution. Uses
    tf.nn.conv2d_transpose which I believe is actually convolution.

    Returns
    -------
    y : :py:class:`tf.Tensor`
        Result of applying convolution to x
    """
    default_args = {
        'strides': [1, 1, 1, 1],
        'padding': 'SAME',
        'data_format': "NHWC",
        'name': None
    }
    for key, val in kwargs.items():
        if key not in default_args.keys():
            raise KeyError(
                'Unknown argument {} for function '.format(key) +
                'tf.nn.conv2d_transpose')
        else:
            default_args[key] = val

    y = tf.cast(y, tf.complex64)
    w = tf.cast(w, tf.complex64)
    y_r = tf.real(y)
    y_i = tf.imag(y)
    w_r = tf.real(w)
    w_i = -tf.imag(w)
    conv = lambda y, w: tf.nn.conv2d_transpose(
        y, w, output_shape, **default_args)
    x_r = conv(y_r, w_r) - conv(y_i, w_i)
    x_i = conv(y_i, w_r) + conv(y_r, w_i)
    x_r = tf.reshape(x_r, output_shape)
    x_i = tf.reshape(x_i, output_shape)

    return tf.complex(x_r, x_i)


[docs]def separable_conv_with_pad(x, h_row, h_col, stride=1):
    """ Function to do spatial separable convolution.

    The filter weights must already be defined. It will use symmetric extension
    before convolution.

    Parameters
    ----------
    x : :py:class:`tf.Tensor` of shape [Batch, height, width, c]
        The input variable. Should be of shape
    h_row : tf tensor of shape [1, l, c_in, c_out]
        The spatial row filter
    h_col : tf tensor of shape [l, 1, c_in, c_out]
        The column filter.
    stride : int
        What stride to use on the convolution.

    Returns
    -------
    y : :py:class:`tf.Tensor`
        Result of applying convolution to x
    """
    # Do the row filter first:
    if tf.is_numeric_tensor(h_row):
        h_size = h_row.get_shape().as_list()
    else:
        h_size = h_row.shape

    assert h_size[0] == 1
    pad = h_size[1] // 2
    if h_size[1] % 2 == 0:
        y = tf.pad(x, [[0, 0], [0, 0], [pad - 1, pad], [0, 0]], 'SYMMETRIC')
    else:
        y = tf.pad(x, [[0, 0], [0, 0], [pad, pad], [0, 0]], 'SYMMETRIC')
    y = tf.nn.conv2d(y, h_row, strides=[1, stride, stride, 1],
                     padding='VALID')

    # Now do the column filtering
    if tf.is_numeric_tensor(h_col):
        h_size = h_col.get_shape().as_list()
    else:
        h_size = h_col.shape

    assert h_size[1] == 1
    pad = h_size[0] // 2
    if h_size[0] % 2 == 0:
        y = tf.pad(y, [[0, 0], [pad - 1, pad], [0, 0], [0, 0]], 'SYMMETRIC')
    else:
        y = tf.pad(y, [[0, 0], [pad, pad], [0, 0], [0, 0]], 'SYMMETRIC')
    y = tf.nn.conv2d(y, h_col, strides=[1, stride, stride, 1],
                     padding='VALID')

    assert x.get_shape().as_list()[1:3] == y.get_shape().as_list()[1:3]

    return y


def _get_var_name(x):
    """ Find the name of the variable by stripping off the scopes

    Notes
    -----
    A typical name will be scope1/scope2/.../name/kernel:0.
    This function serves to split off the scopes and return kernel
    """
    split_colon = x.name.split(':')[0]
    slash_strs = split_colon.split('/')
    #  last_two = slash_strs[-2] + '/' + slash_strs[-1]
    last_one = slash_strs[-1]
    return last_one


def get_static_shape_dyn_batch(x):
    """Returns a tensor representing the static shape of x but keeping the batch
    unkown"""
    batch = tf.shape(x)[0]
    static = x.get_shape()
    return tf.concat([[batch], static[1:]], axis=0)


[docs]def get_xavier_stddev(shape, uniform=False, factor=1.0, mode='FAN_AVG'):
    """Get the correct stddev for a set of weights

    When initializing a deep network, it is in principle advantageous to keep
    the scale of the input variance constant, so it does not explode or diminish
    by reaching the final layer. This initializer use the following formula:

    .. code:: python

      if mode='FAN_IN': # Count only number of input connections.
          n = fan_in
      elif mode='FAN_OUT': # Count only number of output connections.
          n = fan_out
      elif mode='FAN_AVG': # Average number of inputs and output connections.
          n = (fan_in + fan_out)/2.0
          truncated_normal(shape, 0.0, stddev=sqrt(factor/n))

    * To get `Delving Deep into Rectifiers`__, use::

          factor=2.0
          mode='FAN_IN'
          uniform=False

      __ http://arxiv.org/pdf/1502.01852v1.pdf

    * To get `Convolutional Architecture for Fast Feature Embedding`__ , use::

          factor=1.0
          mode='FAN_IN'
          uniform=True

      __ http://arxiv.org/abs/1408.5093

    * To get `Understanding the difficulty of training deep feedforward neural
      networks`__ use::

          factor=1.0
          mode='FAN_AVG'
          uniform=True

      __ http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf

    * To get `xavier_initializer` use either::

        factor=1.0
        mode='FAN_AVG'
        uniform=True

      or::

        factor=1.0
        mode='FAN_AVG'
        uniform=False

    Parameters
    ----------
    factor: float
        A multiplicative factor.
    mode : str
        'FAN_IN', 'FAN_OUT', 'FAN_AVG'.
    uniform : bool
        Whether to use uniform or normal distributed random initialization.
    seed : int
        Used to create random seeds. See `tf.set_random_seed`__
        for behaviour.

        __ https://www.tensorflow.org/api_docs/python/tf/set_random_seed

    dtype : tf.dtype
        The data type. Only floating point types are supported.

    Returns
    -------
    out : float
        The stddev/limit to use that generates tensors with unit variance.

    Raises
    ------
    ValueError : if `dtype` is not a floating point type.
    TypeError : if `mode` is not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG'].
    """
    if shape:
        fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1])
        fan_out = float(shape[-1])
    else:
        fan_in = 1.0
        fan_out = 1.0

    for dim in shape[:-2]:
        fan_in *= float(dim)
        fan_out *= float(dim)

    if mode == 'FAN_IN':
        # Count only number of input connections.
        n = fan_in
    elif mode == 'FAN_OUT':
        # Count only number of output connections.
        n = fan_out
    elif mode == 'FAN_AVG':
        # Average number of inputs and output connections.
        n = (fan_in + fan_out) / 2.0

    if uniform:
        # To get stddev = math.sqrt(factor / n) need to adjust for uniform.
        limit = math.sqrt(3.0 * factor / n)
        return limit
        #  return random_ops.random_uniform(shape, -limit, limit,
                                         #  dtype, seed=seed)
    else:
        # To get stddev = math.sqrt(factor / n) need to adjust for truncated.
        trunc_stddev = math.sqrt(1.3 * factor / n)
        return trunc_stddev
        #  return random_ops.truncated_normal(shape, 0.0, trunc_stddev, dtype,
                                             #  seed=seed)


[docs]def real_reg(w, wd=0.01, norm=2):
    """ Apply regularization on real weights

    norm can be any positive float. Of course the most commonly used values
    would be 2 and 1 (for L2 and L1 regularization), but you can experiment by
    making it some value in between. A value of p returns:

    .. math::

        wd \\times \\sum_{i} ||w_{i}||_{p}^{p}

    Parameters
    ----------
    w : :py:class:`tf.Tensor`
        The weights to regularize
    wd : positive float, optional (default=0.01)
        Regularization parameter
    norm : positive float, optional (default=2)
        The norm to use for regularization. E.g. set norm=1 for the L1 norm.

    Returns
    -------
    reg_loss : :py:class:`tf.Tensor`
        The loss. This method does not add anything to the REGULARIZATION_LOSSES
        collection. The calling function needs to do that.

    Raises
    ------
    ValueError : If norm is less than 0
    """
    if wd is None or wd == 0 or norm is None:
        return
    if norm <= 0:
        raise ValueError('Can only take positive norms, not {}'.format(norm))

    if norm == 2:
        # L2 Loss computes half of the sum of squares
        reg_loss = tf.nn.l2_loss(w)
    elif norm == 1:
        mag = tf.abs(w)
        reg_loss = tf.reduce_sum(mag)
    else:
        mag = tf.abs(w)
        reg_loss = (1/norm) * tf.reduce_sum(mag**norm)

    reg_loss = tf.multiply(reg_loss, wd, name='weight_loss')
    return reg_loss
    #  tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, reg_loss)


[docs]def complex_reg(w, wd=0.01, norm=1):
    """ Apply regularization on complex weights.

    norm can be any positive float. Of course the most commonly used values
    would be 2 and 1 (for L2 and L1 regularization), but you can experiment by
    making it some value in between. A value of p returns:

    .. math::

        wd \\times \\sum_{i} ||w_{i}||_{p}^{p}


    Parameters
    ----------
    w : :py:class:`tf.Tensor` (dtype=complex)
        The weights to regularize
    wd : positive float, optional (default=0.01)
        Regularization parameter
    norm : positive float, optional (default=1)
        The norm to use for regularization. E.g. set norm=1 for the L1 norm.

    Returns
    -------
    reg_loss : :py:class:`tf.Tensor`
        The loss. This method does not add anything to the REGULARIZATION_LOSSES
        collection. The calling function needs to do that.

    Raises
    ------
    ValueError : If norm is less than 0

    Notes
    -----
    Can call this function with real weights too, making it perhaps a better
    de-facto function to call, as it able to handle both cases.
    """
    if wd is None or wd == 0 or norm is None:
        return
    if norm <= 0:
        raise ValueError('Can only take positive norms, not {}'.format(norm))

    # Check the weights input. Use the real regularizer if weights are purely
    # real
    if w.dtype.is_floating:
        return real_reg(w, wd, norm)

    # L2 is a special regularization where we can regularize the real and
    # imaginary components independently. All other types we need to combine
    # them to get the magnitude.
    if norm == 2:
        # L2 Loss computes half of the sum of squares
        reg_loss = tf.nn.l2_loss(tf.real(w)) + tf.nn.l2_loss(tf.imag(w))
    elif norm == 1:
        mag = tf.abs(w)
        reg_loss = tf.reduce_sum(mag)
    else:
        mag = tf.abs(w)
        reg_loss = (1/norm) * tf.reduce_sum(mag**norm)

    reg_loss = tf.multiply(reg_loss, wd, name='weight_loss')
    return reg_loss
    #  tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, reg_loss)