DatasetFolder¶
- class paddle.vision.datasets. DatasetFolder ( root, loader=None, extensions=None, transform=None, is_valid_file=None ) [source]
- 
         A generic data loader where the samples are arranged in this way: root/class_a/1.ext root/class_a/2.ext root/class_a/3.ext root/class_b/123.ext root/class_b/456.ext root/class_b/789.ext - Parameters
- 
           - root (str) – Root directory path. 
- loader (Callable, optional) – A function to load a sample given its path. Default: None. 
- extensions (list[str]|tuple[str], optional) – A list of allowed extensions. Both - extensionsand- is_valid_fileshould not be passed. If this value is not set, the default is to use (‘.jpg’, ‘.jpeg’, ‘.png’, ‘.ppm’, ‘.bmp’, ‘.pgm’, ‘.tif’, ‘.tiff’, ‘.webp’). Default: None.
- transform (Callable, optional) – A function/transform that takes in a sample and returns a transformed version. Default: None. 
- is_valid_file (Callable, optional) – A function that takes path of a file and check if the file is a valid file. Both - extensionsand- is_valid_fileshould not be passed. Default: None.
 
- Returns
- 
           Dataset. An instance of DatasetFolder. 
 - classes
- 
           List of the class names. - Type
- 
             list[str] 
 
 - class_to_idx
- 
           Dict with items (class_name, class_index). - Type
- 
             dict[str, int] 
 
 - samples
- 
           List of (sample_path, class_index) tuples. - Type
- 
             list[tuple[str, int]] 
 
 - targets
- 
           The class_index value for each image in the dataset. - Type
- 
             list[int] 
 
 Example import shutil import tempfile import cv2 import numpy as np import paddle.vision.transforms as T from pathlib import Path from paddle.vision.datasets import DatasetFolder def make_fake_file(img_path: str): if img_path.endswith((".jpg", ".png", ".jpeg")): fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8) cv2.imwrite(img_path, fake_img) elif img_path.endswith(".txt"): with open(img_path, "w") as f: f.write("This is a fake file.") def make_directory(root, directory_hierarchy, file_maker=make_fake_file): root = Path(root) root.mkdir(parents=True, exist_ok=True) for subpath in directory_hierarchy: if isinstance(subpath, str): filepath = root / subpath file_maker(str(filepath)) else: dirname = list(subpath.keys())[0] make_directory(root / dirname, subpath[dirname]) directory_hirerarchy = [ {"class_0": [ "abc.jpg", "def.png"]}, {"class_1": [ "ghi.jpeg", "jkl.png", {"mno": [ "pqr.jpeg", "stu.jpg"]}]}, "this_will_be_ignored.txt", ] # You can replace this with any directory to explore the structure # of generated data. e.g. fake_data_dir = "./temp_dir" fake_data_dir = tempfile.mkdtemp() make_directory(fake_data_dir, directory_hirerarchy) data_folder_1 = DatasetFolder(fake_data_dir) print(data_folder_1.classes) # ['class_0', 'class_1'] print(data_folder_1.class_to_idx) # {'class_0': 0, 'class_1': 1} print(data_folder_1.samples) # [('./temp_dir/class_0/abc.jpg', 0), ('./temp_dir/class_0/def.png', 0), # ('./temp_dir/class_1/ghi.jpeg', 1), ('./temp_dir/class_1/jkl.png', 1), # ('./temp_dir/class_1/mno/pqr.jpeg', 1), ('./temp_dir/class_1/mno/stu.jpg', 1)] print(data_folder_1.targets) # [0, 0, 1, 1, 1, 1] print(len(data_folder_1)) # 6 for i in range(len(data_folder_1)): img, label = data_folder_1[i] # do something with img and label print(type(img), img.size, label) # <class 'PIL.Image.Image'> (32, 32) 0 transform = T.Compose( [ T.Resize(64), T.ToTensor(), T.Normalize( mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], to_rgb=True, ), ] ) data_folder_2 = DatasetFolder( fake_data_dir, loader=lambda x: cv2.imread(x), # load image with OpenCV extensions=(".jpg",), # only load *.jpg files transform=transform, # apply transform to every image ) print([img_path for img_path, label in data_folder_2.samples]) # ['./temp_dir/class_0/abc.jpg', './temp_dir/class_1/mno/stu.jpg'] print(len(data_folder_2)) # 2 for img, label in iter(data_folder_2): # do something with img and label print(type(img), img.shape, label) # <class 'paddle.Tensor'> [3, 64, 64] 0 shutil.rmtree(fake_data_dir) 
