fluid.clip

ErrorClipByValue

class paddle.fluid.clip.ErrorClipByValue(max, min=None)[source]

Clips tensor values to the range [min, max].

Given a tensor t (see Examples below), this operation clips its value to min and max inplace.

  • Any values less than min are set to min.

  • Any values greater than max are set to max.

Parameters
  • max (float) – The maximum value to clip by.

  • min (float, optional) – The minimum value to clip by. if not set by user, will be set to -max by framework.

Examples

import paddle.fluid as fluid
BATCH_SIZE = 128
CLIP_MAX = 2e-6
CLIP_MIN = -1e-6
prog = fluid.framework.Program()
with fluid.program_guard(main_program=prog):
    image = fluid.layers.data(name='x', shape=[784], dtype='float32')
    hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
    hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
    predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
    label = fluid.layers.data(name='y', shape=[1], dtype='int64')
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(cost)
prog_clip = prog.clone()
prog_clip.block(0).var(hidden1.name)._set_error_clip(
    fluid.clip.ErrorClipByValue(
        max=CLIP_MAX, min=CLIP_MIN)

GradientClipByGlobalNorm

class paddle.fluid.clip.GradientClipByGlobalNorm(clip_norm, group_name='default_group')[source]

Clips values of multiple tensors by the ratio of the sum of their norms.

Given a list of tensors t_list , and a clipping ratio clip_norm, this operation returns a instance of this class as first parameter of set_gradient_clip method, second parameter of set_gradient_clip is used to compute clipped tensors list list_clipped (default value is None, compute global norm global_norm based in all tensors). global norm (global_norm) of all tensors in t_list.

To perform the clipping, the values \(t\_list[i]\) are set to:

\[t\_list[i] = t\_list[i] * \frac{clip\_norm}{\max(global\_norm, clip\_norm)}\]

where:

\[global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2}\]

If \(clip\_norm > global\_norm\) then the entries in t_list remain as they are, otherwise they’re all shrunk by the global ratio.

Parameters
  • clip_norm (float) – The maximum norm value

  • group_name (str, optional) – The group name for this clip.

Examples

import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle

place = core.CPUPlace()
prog = fluid.framework.Program()
startup_program = fluid.framework.Program()
with fluid.program_guard(
        main_program=prog, startup_program=startup_program):
    image = fluid.layers.data(name='x', shape=[784], dtype='float32')
    label = fluid.layers.data(name='y', shape=[1], dtype='int64')
    hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
    hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
    predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(cost)

prog_clip = prog.clone()
avg_cost_clip = prog_clip.block(0).var(avg_cost.name)

p_g = fluid.backward.append_backward(loss=avg_cost)
p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)

with fluid.program_guard(main_program=prog_clip, startup_program=startup_program):
    fluid.clip.set_gradient_clip(
        fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
    p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)

grad_list = [elem[1] for elem in p_g]
grad_clip_list = [elem[1] for elem in p_g_clip]

train_reader = paddle.batch(
    paddle.reader.shuffle(
        paddle.dataset.mnist.train(), buf_size=8192),
    batch_size=128)

exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
exe.run(startup_program)

count = 0
for data in train_reader():
    count += 1
    print("count:%s" % count)
    if count > 5:
        break
    out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list)
    out_clip = exe.run(prog_clip,
                       feed=feeder.feed(data),
                       fetch_list=grad_clip_list)

GradientClipByNorm

class paddle.fluid.clip.GradientClipByNorm(clip_norm)[source]

Convert the input multidimensional Tensor \(X\) to a multidimensional Tensor whose L2 norm does not exceed the given two-norm maximum ( \(clip\_norm\) ).

The tensor is not passed through this class, but passed through the parametre of main_program in fluid.program_guard.

This class limits the L2 norm of the input \(X\) within \(clip\_norm\).

\[\begin{split}Out = \left \{ \begin{aligned} & X & & if (norm(X) \leq clip\_norm) \\ & \frac{clip\_norm*X}{norm(X)} & & if (norm(X) > clip\_norm) \\ \end{aligned} \right.\end{split}\]

where \(norm(X)\) represents the L2 norm of \(X\).

\[norm(X) = ( \sum_{i=1}^{n}|x\_i|^2)^{ \frac{1}{2}}\]
Parameters

clip_norm (float) – The maximum norm value

Examples

import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle
place = core.CPUPlace()
prog = fluid.framework.Program()
startup_program = fluid.framework.Program()
with fluid.program_guard(
            main_program=prog, startup_program=startup_program):
    image = fluid.data(name='x', shape=[None, 784], dtype='float32', lod_level=0)
    label = fluid.data(name='y', shape=[None, 1], dtype='int64', lod_level=0)
    hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
    hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
    predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(cost)
prog_clip = prog.clone()
avg_cost_clip = prog_clip.block(0).var(avg_cost.name)
p_g = fluid.backward.append_backward(loss=avg_cost)
p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
with fluid.program_guard(main_program=prog_clip, startup_program=startup_program):
    fluid.clip.set_gradient_clip(
        fluid.clip.GradientClipByNorm(clip_norm=2.0))
    p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
grad_list = [elem[1] for elem in p_g]
grad_clip_list = [elem[1] for elem in p_g_clip]
train_reader = paddle.batch(
    paddle.reader.shuffle(
        paddle.dataset.mnist.train(), buf_size=8192),
    batch_size=128)

exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
exe.run(startup_program)

count = 0
for data in train_reader():
    count += 1
    print("count:%s" % count)
    if count > 5:
       break
    out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list)
    out_clip = exe.run(prog_clip,
                       feed=feeder.feed(data),
                       fetch_list=grad_clip_list)

GradientClipByValue

class paddle.fluid.clip.GradientClipByValue(max, min=None)[source]

Clips gradient values to the range [min, max].

Given a tensor t, this operation clips its value to min and max inplace.

  • Any values less than min are set to min.

  • Any values greater than max are set to max.

Parameters
  • max (float) – The maximum value to clip by.

  • min (float, optional) – The minimum value to clip by. if not set by user, will be set to -max by framework.

Examples

import paddle.fluid as fluid
w_param_attrs = fluid.ParamAttr(name=None,
  initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
  learning_rate=1.0,
  regularizer=fluid.regularizer.L1Decay(1.0),
  trainable=True,
  gradient_clip=fluid.clip.GradientClipByValue(-1.0, 1.0))
x = fluid.layers.data(name='x', shape=[10], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)