ImageFolder

class paddle.vision.datasets. ImageFolder ( root, loader=None, extensions=None, transform=None, is_valid_file=None ) [source]

A generic data loader where the samples are arranged in this way:

root/1.ext
root/2.ext
root/sub_dir/3.ext
Parameters
  • root (str) – Root directory path.

  • loader (Callable, optional) – A function to load a sample given its path. Default: None.

  • extensions (list[str]|tuple[str], optional) – A list of allowed extensions. Both extensions and is_valid_file should not be passed. If this value is not set, the default is to use (‘.jpg’, ‘.jpeg’, ‘.png’, ‘.ppm’, ‘.bmp’, ‘.pgm’, ‘.tif’, ‘.tiff’, ‘.webp’). Default: None.

  • transform (Callable, optional) – A function/transform that takes in a sample and returns a transformed version. Default: None.

  • is_valid_file (Callable, optional) – A function that takes path of a file and check if the file is a valid file. Both extensions and is_valid_file should not be passed. Default: None.

Returns

Dataset. An instance of ImageFolder.

samples

List of sample path.

Type

list[str]

Example

>>> import shutil
>>> import tempfile
>>> import cv2
>>> import numpy as np
>>> import paddle.vision.transforms as T
>>> from pathlib import Path
>>> from paddle.vision.datasets import ImageFolder


>>> def make_fake_file(img_path: str):
...     if img_path.endswith((".jpg", ".png", ".jpeg")):
...         fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
...         cv2.imwrite(img_path, fake_img)
...     elif img_path.endswith(".txt"):
...         with open(img_path, "w") as f:
...             f.write("This is a fake file.")

>>> def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
...     root = Path(root)
...     root.mkdir(parents=True, exist_ok=True)
...     for subpath in directory_hierarchy:
...         if isinstance(subpath, str):
...             filepath = root / subpath
...             file_maker(str(filepath))
...         else:
...             dirname = list(subpath.keys())[0]
...             make_directory(root / dirname, subpath[dirname])

>>> directory_hierarchy = [
...     "abc.jpg",
...     "def.png",
...     {"ghi": [
...         "jkl.jpeg",
...         {"mno": [
...             "pqr.jpg"]}]},
...     "this_will_be_ignored.txt",
... ]

>>> # You can replace this with any directory to explore the structure
>>> # of generated data. e.g. fake_data_dir = "./temp_dir"
>>> fake_data_dir = tempfile.mkdtemp()
>>> make_directory(fake_data_dir, directory_hierarchy)
>>> image_folder_1 = ImageFolder(fake_data_dir)
>>> print(image_folder_1.samples)
>>> 
['./temp_dir/abc.jpg', './temp_dir/def.png',
 './temp_dir/ghi/jkl.jpeg', './temp_dir/ghi/mno/pqr.jpg']
>>> 
>>> print(len(image_folder_1))
4

>>> for i in range(len(image_folder_1)):
...     (img,) = image_folder_1[i]
...     # do something with img
...     print(type(img), img.size)
...     # <class 'PIL.Image.Image'> (32, 32)


>>> transform = T.Compose(
...     [
...         T.Resize(64),
...         T.ToTensor(),
...         T.Normalize(
...             mean=[0.5, 0.5, 0.5],
...             std=[0.5, 0.5, 0.5],
...             to_rgb=True,
...         ),
...     ]
... )

>>> image_folder_2 = ImageFolder(
...     fake_data_dir,
...     loader=lambda x: cv2.imread(x),  # load image with OpenCV
...     extensions=(".jpg",),  # only load *.jpg files
...     transform=transform,  # apply transform to every image
... )

>>> print(image_folder_2.samples)
>>> 
['./temp_dir/abc.jpg', './temp_dir/ghi/mno/pqr.jpg']
>>> 
>>> print(len(image_folder_2))
2

>>> for (img,) in iter(image_folder_2):
...     # do something with img
...     print(type(img), img.shape)
...     # <class 'paddle.Tensor'> [3, 64, 64]

>>> shutil.rmtree(fake_data_dir)