DataParallel

class paddle.fluid.dygraph.DataParallel(layers, strategy)[source]

Run the dygraph module with data parallelism.

Currently, DataParallel class only supports to run the dynamic graph with multi-process. The usage is: python -m paddle.distributed.launch –selected_gpus=0,1 dynamic_graph_test.py. And the content of dynamic_graph_test.py is the code of examples.

Parameters
  • layers (Layer) – The module that should be executed by data parallel.

  • strategy (ParallelStrategy) – The strategy of data parallelism, contains environment configuration related to parallel execution.

Returns

The data paralleled module.

Return type

Layer

Examples

import numpy as np
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph.base import to_variable

place = place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
with fluid.dygraph.guard(place=place):

    # prepare the data parallel context
    strategy=dygraph.prepare_context()

    linear = Linear(1, 10, act="softmax")
    adam = fluid.optimizer.AdamOptimizer()

    # make the module become the data parallelism module
    linear = dygraph.DataParallel(linear, strategy)

    x_data = np.random.random(size=[10, 1]).astype(np.float32)
    data = to_variable(x_data)

    hidden = linear(data)
    avg_loss = fluid.layers.mean(hidden)

    # scale the loss according to the number of trainers.
    avg_loss = linear.scale_loss(avg_loss)

    avg_loss.backward()

    # collect the gradients of trainers.
    linear.apply_collective_grads()

    adam.minimize(avg_loss)
    linear.clear_gradients()
forward(*inputs, **kwargs)

Defines the computation performed at every call. Should be overridden by all subclasses.

Parameters
  • *inputs (tuple) – unpacked tuple arguments

  • **kwargs (dict) – unpacked dict arguments

scale_loss(loss)

Scale the loss. In data parallel mode, the loss should be scale with the number of trainers. If not in data parallel mode, return the loss directly.

Parameters

loss (Variable) – The loss of the current Model.

Returns

the scaled loss.

Return type

Variable

Examples

import numpy as np
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph.base import to_variable

place = place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
with fluid.dygraph.guard(place=place):
    strategy=dygraph.prepare_context()
    linear = Linear(1, 10, act="softmax")
    adam = fluid.optimizer.AdamOptimizer()
    linear = dygraph.DataParallel(linear, strategy)

    x_data = np.random.random(size=[10, 1]).astype(np.float32)
    data = to_variable(x_data)
    hidden = linear(data)
    avg_loss = fluid.layers.mean(hidden)

    # scale the loss according to the number of trainers.
    avg_loss = linear.scale_loss(avg_loss)

    avg_loss.backward()
    linear.apply_collective_grads()

    adam.minimize(avg_loss)
    linear.clear_gradients()
apply_collective_grads()

AllReduce the Parameters’ gradient.

Examples

import numpy as np
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph.base import to_variable

place = place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
with fluid.dygraph.guard(place=place):
    strategy=dygraph.prepare_context()
    linear = Linear(1, 10, act="softmax")
    adam = fluid.optimizer.AdamOptimizer()
    linear = dygraph.DataParallel(linear, strategy)

    x_data = np.random.random(size=[10, 1]).astype(np.float32)
    data = to_variable(x_data)
    hidden = linear(data)
    avg_loss = fluid.layers.mean(hidden)
    avg_loss = linear.scale_loss(avg_loss)
    avg_loss.backward()

    # collect the gradients of trainers.
    linear.apply_collective_grads()

    adam.minimize(avg_loss)
    linear.clear_gradients()
state_dict(destination=None, include_sublayers=True, structured_name_prefix='')

Get all parameters of self._layers and its sub-layers. And set all the parameters into a dict

Parameters
  • destination (dict, optional) – If provide, all the parameters will set to this dict . Default: None

  • include_sublayers (bool, optional) – If true, also include the parameters from sublayers. Default: True

  • structured_name_prefix (str, optional) – If not empty str, all the key in state dict will start with structured_name_prefix

Retruns:

dict: a dict contains all the parameters of self._layers

Examples

import paddle.fluid as fluid
with fluid.dygraph.guard():
    strategy=fluid.dygraph.prepare_context()
    emb = fluid.dygraph.Embedding([10, 10])
    emb = fluid.dygraph.DataParallel(emb, strategy)

    state_dict = emb.state_dict()
    fluid.save_dygraph( state_dict, "paddle_dy")
set_dict(stat_dict, include_sublayers=True, use_structured_name=True)

Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict

Parameters
  • state_dict (dict) – Dict contains all the parameters

  • include_sublayers (bool, optional) – If true, also include the parameters from sublayers. Default: True

  • use_structured_name (bool, optional) – If true, use structured name as key, otherwise, use parameter name as key. Default: True

Returns

None

Examples

import paddle.fluid as fluid
with fluid.dygraph.guard():
    strategy=fluid.dygraph.prepare_context()
    emb = fluid.dygraph.Embedding([10, 10])
    emb = fluid.dygraph.DataParallel(emb, strategy)

    state_dict = emb.state_dict()
    fluid.save_dygraph( state_dict, "paddle_dy")

    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")

    emb.set_dict( para_state_dict )
load_dict(stat_dict, include_sublayers=True, use_structured_name=True)

Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict

This api will be Deprecated. Please use set_dict

Parameters
  • state_dict (dict) – Dict contains all the parameters

  • include_sublayers (bool, optional) – If true, also include the parameters from sublayers. Default: True

  • use_structured_name (bool, optional) – If true, use structured name as key, otherwise, use parameter name as key. Default: True

Returns

None

Examples

import paddle.fluid as fluid
with fluid.dygraph.guard():
    strategy=fluid.dygraph.prepare_context()
    emb = fluid.dygraph.Embedding([10, 10])
    emb = fluid.dygraph.DataParallel(emb, strategy)

    state_dict = emb.state_dict()
    fluid.save_dygraph( state_dict, "paddle_dy")

    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")

    emb.load_dict( para_state_dict )