class paddle.text. Imdb ( data_file=None, mode='train', cutoff=150, download=True ) [source]

Implementation of IMDB dataset.

  • data_file (str) – path to data tar file, can be set None if download is True. Default None

  • mode (str) – ‘train’ ‘test’ mode. Default ‘train’.

  • cutoff (int) – cutoff number for building word dictionary. Default 150.

  • download (bool) – whether to download dataset automatically if data_file is not set. Default True


instance of IMDB dataset

Return type



import paddle
from paddle.text.datasets import Imdb

class SimpleNet(paddle.nn.Layer):
    def __init__(self):
        super(SimpleNet, self).__init__()

    def forward(self, doc, label):
        return paddle.sum(doc), label

imdb = Imdb(mode='train')

for i in range(10):
    doc, label = imdb[i]
    doc = paddle.to_tensor(doc)
    label = paddle.to_tensor(label)

    model = SimpleNet()
    image, label = model(doc, label)
    print(doc.numpy().shape, label.numpy().shape)