Hello,
I’ve downloaded both the notebooks and the data on my local computer to dig into the data and the pytorch functions.
While trying to replicate the notebooks I ran into an error that I don’t understand.
Here’s my code (~same as the lab notebook).
Imports
import os
import tarfile
import requests
import scipy
from tqdm import tqdm
from PIL import Image
from torch.utils.data import Dataset
Data download
data_dir = "./flower_data"
img_folder_path = os.path.join(data_dir, 'jpg')
labels_file_path = os.path.join(data_dir, 'imagelabels.mat')
tgz_path = os.path.join(data_dir, '102flowers.tgz')
os.makedirs(data_dir, exist_ok=True)
image_url = "https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz"
labels_url = "https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat"
response = requests.get(image_url, stream=True)
total_size = int(response.headers.get("content-length", 0))
with open(tgz_path, "wb") as file:
for data in tqdm(
response.iter_content(chunk_size=1024),
total=total_size // 1024,
):
file.write(data)
with tarfile.open(tgz_path, "r:gz") as tar:
tar.extractall(data_dir)
response = requests.get(labels_url)
with open(labels_file_path, 'wb') as file:
file.write(response.content)
FlowerDataset definition
class FlowerDataset(Dataset):
def __init__(self, root_dir, tranform=None):
self.root_dir = root_dir
self.tranform = tranform
self.img_dir = os.path.join(self.root_dir, "jpg")
self.labels = self.load_and_correct_labels()
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
image = self.retrieve_image(idx)
if self.tranform:
image = self.tranform(image)
label = self.labels[idx]
return image, label
def retrieve_image(self, idx):
img_name = f"image_{idx+1:05d}.jpg"
img_path = os.path.join(self.img_dir, img_name)
with Image.open(img_path) as img:
image = img.convert("RGB")
return image
def load_and_correct_labels(self):
self.labels_mat = scipy.io.loadmat(
os.path.join(self.root_dir, 'imagelabels.mat')
)
labels = self.labels_mat['labels'][0] - 1
return labels
Loop through the dataset
dataset = FlowerDataset(data_dir)
for _ in dataset:
pass
When I do this I get the following error:
[Errno 2] No such file or directory: ‘./flower_data\jpg\image_08190.jpg’
I’ve checked the length of the dataset object and it is 8189
print(len(dataset))
# 8189
→ But, it seems that the iteration goes from 0 to 8189 instead of 8188.
I tried a few AIs to troubleshoot but they did not seem to find where it comes from. They ended up by giving me this to avoid the error:
def __getitem__(self, idx):
if idx >= len(self.labels):
raise IndexError(f"Index {idx} out of range for dataset of length {len(self.labels)}")
image = self.retrieve_image(idx)
if self.tranform:
image = self.tranform(image)
label = self.labels[idx]
return image, label
Is there something I’m missing ?
