thanks so much for this forum, its helping me to learn so much! I will try to answer a few questions as well. Still being a newbie my question is below.
I am trying to display an image from a deep learning pipeline with the associated label. The error I am getting is that the image is too large in pixel size. I am guessing that the labels may be loading incorrectly or somewhere in the pipeline the image is not sized correctly. I think this because when I try to do something like;
train_features, label = next(iter(trn_dl))
print(train_features, train_labels)
It throws an error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-56-9b9b1ca99075> in <module>
1 ##documentation: https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
2
----> 3 train_features, labels = next(iter(trn_dl))
4
5 print(train_features, train_labels)
ValueError: too many values to unpack (expected 2)
However I also get an error when I run the code below, when using directories with multiple levels. For example if I use a directory a/images. It works okay, but if I use a directory a/b/c/images, I get this type of error.
Here’s my data loading class, and data loader (which seems to work and does not throw and error):
class CaptioningData(Dataset):
def __init__(self, root, df, vocab):
self.df = df.reset_index(drop=True)
self.root = root
self.vocab = vocab
self.id_to_path = dict()
for dirpath, _, filenames in os.walk(self.root):
for filename in filenames:
id, ext = os.path.splitext(filename)
if ext == '.png':
self.id_to_path[id] = os.path.join(dirpath, filename)
self.transform = transforms.Compose([
transforms.Resize(224),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))]
)
#def __getitem__(self, idx):
#img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
#image = read_image(img_path)
#label = self.img_labels.iloc[idx, 1]
#if self.transform:
# image = self.transform(image)
#if self.target_transform:
# label = self.target_transform(label)
#return image, label
def __getitem__(self, idx): #ORIGINAL says self, index
"""Returns one data pair (image and caption)."""
row = self.df.iloc[idx].squeeze()
id = row.image_id
image = self.id_to_path[id]
image = Image.open(image).convert('RGB')
#for id in row.image_id:#DEBUG
# caption = row.InChI #DEBUG
caption = row.InChI # here need caption to match image_id
#tokens = str(caption).lower().split()
tokens = caption.split()
target = []
target.append(vocab.stoi['<start>'])
target.extend([vocab.stoi[token] for token in tokens]) #this line is a problem as each caption is one token
target.append(vocab.stoi['<end>'])
target = torch.Tensor(target).long()
return image, target, caption
#debug
def choose(self):
return self[np.random.randint(len(self))]
def __len__(self):
return len(self.df)
def collate_fn(self, data):
data.sort(key=lambda x: len(x[1]), reverse=True)
images, targets, captions = zip(*data)
images = torch.stack([self.transform(image) for image in images], 0)
lengths = [len(tar) for tar in targets]
_targets = torch.zeros(len(captions), max(lengths)).long()
for i, tar in enumerate(targets):
end = lengths[i]
_targets[i, :end] = tar[:end]
return images.to(device), _targets.to(device), torch.tensor(lengths).long().to(device)
However at the end of my pipeline I use the following code to display and retrieve the image and image labels and I get this error (code I’m using is shown first).
def load_image(image_path, transform=True): #original is none, debug is True
image = Image.open(image_path).convert('RGB') #original
image = image.resize([224, 224], Image.LANCZOS)
if transform is not None:
tfm_image = transform(image)[None]
return image, tfm_image
@torch.no_grad()
def load_image_and_predict(image_path):
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))
])
org_image, tfm_image = load_image(image_path, transform)
image_tensor = tfm_image.to(device)
encoder.eval()
decoder.eval()
feature = encoder(image_tensor)
sentence = decoder.predict(feature)[0]
show(org_image, title=sentence)
return sentence
#files = Glob(DEBUG_TRAIN) #debug code, test should have the same number of files and folders and train
files = Glob(DEBUG_TEST) # instantiated at the start of the code
"""Loop through all files in the pathway, load images and predict captions"""
#try this later if putting files in one directory doesn't help
for _ in range(5):
load_image_and_predict(choose(files))
Errors it throws are below:
ValueError Traceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
339 pass
340 else:
--> 341 return printer(obj)
342 # Finally look for special method names
343 method = get_real_method(obj, self.print_method)
/opt/conda/lib/python3.8/site-packages/IPython/core/pylabtools.py in <lambda>(fig)
248
249 if 'png' in formats:
--> 250 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
251 if 'retina' in formats or 'png2x' in formats:
252 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
/opt/conda/lib/python3.8/site-packages/IPython/core/pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
132 FigureCanvasBase(fig)
133
--> 134 fig.canvas.print_figure(bytes_io, **kw)
135 data = bytes_io.getvalue()
136 if fmt == 'svg':
/opt/conda/lib/python3.8/site-packages/matplotlib/backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
2253 # force the figure dpi to 72), so we need to set it again here.
2254 with cbook._setattr_cm(self.figure, dpi=dpi):
-> 2255 result = print_method(
2256 filename,
2257 facecolor=facecolor,
/opt/conda/lib/python3.8/site-packages/matplotlib/backend_bases.py in wrapper(*args, **kwargs)
1667 kwargs.pop(arg)
1668
-> 1669 return func(*args, **kwargs)
1670
1671 return wrapper
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args)
506 *metadata*, including the default 'Software' key.
507 """
--> 508 FigureCanvasAgg.draw(self)
509 mpl.image.imsave(
510 filename_or_obj, self.buffer_rgba(), format="png", origin="upper",
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in draw(self)
399 def draw(self):
400 # docstring inherited
--> 401 self.renderer = self.get_renderer(cleared=True)
402 # Acquire a lock on the shared font cache.
403 with RendererAgg.lock, \
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in get_renderer(self, cleared)
415 and getattr(self, "_lastKey", None) == key)
416 if not reuse_renderer:
--> 417 self.renderer = RendererAgg(w, h, self.figure.dpi)
418 self._lastKey = key
419 elif cleared:
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in __init__(self, width, height, dpi)
89 self.width = width
90 self.height = height
---> 91 self._renderer = _RendererAgg(int(width), int(height), dpi)
92 self._filter_renderers = []
93
ValueError: Image size of 67861x302 pixels is too large. It must be less than 2^16 in each direction.
<Figure size 360x360 with 1 Axes>
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
339 pass
340 else:
--> 341 return printer(obj)
342 # Finally look for special method names
343 method = get_real_method(obj, self.print_method)
/opt/conda/lib/python3.8/site-packages/IPython/core/pylabtools.py in <lambda>(fig)
248
249 if 'png' in formats:
--> 250 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
251 if 'retina' in formats or 'png2x' in formats:
252 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
/opt/conda/lib/python3.8/site-packages/IPython/core/pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
132 FigureCanvasBase(fig)
133
--> 134 fig.canvas.print_figure(bytes_io, **kw)
135 data = bytes_io.getvalue()
136 if fmt == 'svg':
/opt/conda/lib/python3.8/site-packages/matplotlib/backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
2253 # force the figure dpi to 72), so we need to set it again here.
2254 with cbook._setattr_cm(self.figure, dpi=dpi):
-> 2255 result = print_method(
2256 filename,
2257 facecolor=facecolor,
/opt/conda/lib/python3.8/site-packages/matplotlib/backend_bases.py in wrapper(*args, **kwargs)
1667 kwargs.pop(arg)
1668
-> 1669 return func(*args, **kwargs)
1670
1671 return wrapper
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args)
506 *metadata*, including the default 'Software' key.
507 """
--> 508 FigureCanvasAgg.draw(self)
509 mpl.image.imsave(
510 filename_or_obj, self.buffer_rgba(), format="png", origin="upper",
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in draw(self)
399 def draw(self):
400 # docstring inherited
--> 401 self.renderer = self.get_renderer(cleared=True)
402 # Acquire a lock on the shared font cache.
403 with RendererAgg.lock, \
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in get_renderer(self, cleared)
415 and getattr(self, "_lastKey", None) == key)
416 if not reuse_renderer:
--> 417 self.renderer = RendererAgg(w, h, self.figure.dpi)
418 self._lastKey = key
419 elif cleared:
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in __init__(self, width, height, dpi)
89 self.width = width
90 self.height = height
---> 91 self._renderer = _RendererAgg(int(width), int(height), dpi)
92 self._filter_renderers = []
93
ValueError: Image size of 70653x302 pixels is too large. It must be less than 2^16 in each direction.
<Figure size 360x360 with 1 Axes>
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
339 pass
340 else:
--> 341 return printer(obj)
342 # Finally look for special method names
343 method = get_real_method(obj, self.print_method)
/opt/conda/lib/python3.8/site-packages/IPython/core/pylabtools.py in <lambda>(fig)
248
249 if 'png' in formats:
--> 250 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
251 if 'retina' in formats or 'png2x' in formats:
252 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
/opt/conda/lib/python3.8/site-packages/IPython/core/pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
132 FigureCanvasBase(fig)
133
--> 134 fig.canvas.print_figure(bytes_io, **kw)
135 data = bytes_io.getvalue()
136 if fmt == 'svg':
/opt/conda/lib/python3.8/site-packages/matplotlib/backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
2253 # force the figure dpi to 72), so we need to set it again here.
2254 with cbook._setattr_cm(self.figure, dpi=dpi):
-> 2255 result = print_method(
2256 filename,
2257 facecolor=facecolor,
/opt/conda/lib/python3.8/site-packages/matplotlib/backend_bases.py in wrapper(*args, **kwargs)
1667 kwargs.pop(arg)
1668
-> 1669 return func(*args, **kwargs)
1670
1671 return wrapper
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args)
506 *metadata*, including the default 'Software' key.
507 """
--> 508 FigureCanvasAgg.draw(self)
509 mpl.image.imsave(
510 filename_or_obj, self.buffer_rgba(), format="png", origin="upper",
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in draw(self)
399 def draw(self):
400 # docstring inherited
--> 401 self.renderer = self.get_renderer(cleared=True)
402 # Acquire a lock on the shared font cache.
403 with RendererAgg.lock, \
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in get_renderer(self, cleared)
415 and getattr(self, "_lastKey", None) == key)
416 if not reuse_renderer:
--> 417 self.renderer = RendererAgg(w, h, self.figure.dpi)
418 self._lastKey = key
419 elif cleared:
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in __init__(self, width, height, dpi)
89 self.width = width
90 self.height = height
---> 91 self._renderer = _RendererAgg(int(width), int(height), dpi)
92 self._filter_renderers = []
93
ValueError: Image size of 70653x302 pixels is too large. It must be less than 2^16 in each direction.
<Figure size 360x360 with 1 Axes>
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
339 pass
340 else:
--> 341 return printer(obj)
342 # Finally look for special method names
343 method = get_real_method(obj, self.print_method)
/opt/conda/lib/python3.8/site-packages/IPython/core/pylabtools.py in <lambda>(fig)
248
249 if 'png' in formats:
--> 250 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
251 if 'retina' in formats or 'png2x' in formats:
252 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
/opt/conda/lib/python3.8/site-packages/IPython/core/pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
132 FigureCanvasBase(fig)
133
--> 134 fig.canvas.print_figure(bytes_io, **kw)
135 data = bytes_io.getvalue()
136 if fmt == 'svg':
/opt/conda/lib/python3.8/site-packages/matplotlib/backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
2253 # force the figure dpi to 72), so we need to set it again here.
2254 with cbook._setattr_cm(self.figure, dpi=dpi):
-> 2255 result = print_method(
2256 filename,
2257 facecolor=facecolor,
/opt/conda/lib/python3.8/site-packages/matplotlib/backend_bases.py in wrapper(*args, **kwargs)
1667 kwargs.pop(arg)
1668
-> 1669 return func(*args, **kwargs)
1670
1671 return wrapper
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args)
506 *metadata*, including the default 'Software' key.
507 """
--> 508 FigureCanvasAgg.draw(self)
509 mpl.image.imsave(
510 filename_or_obj, self.buffer_rgba(), format="png", origin="upper",
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in draw(self)
399 def draw(self):
400 # docstring inherited
--> 401 self.renderer = self.get_renderer(cleared=True)
402 # Acquire a lock on the shared font cache.
403 with RendererAgg.lock, \
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in get_renderer(self, cleared)
415 and getattr(self, "_lastKey", None) == key)
416 if not reuse_renderer:
--> 417 self.renderer = RendererAgg(w, h, self.figure.dpi)
418 self._lastKey = key
419 elif cleared:
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in __init__(self, width, height, dpi)
89 self.width = width
90 self.height = height
---> 91 self._renderer = _RendererAgg(int(width), int(height), dpi)
92 self._filter_renderers = []
93
ValueError: Image size of 70653x302 pixels is too large. It must be less than 2^16 in each direction.
<Figure size 360x360 with 1 Axes>
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
339 pass
340 else:
--> 341 return printer(obj)
342 # Finally look for special method names
343 method = get_real_method(obj, self.print_method)
/opt/conda/lib/python3.8/site-packages/IPython/core/pylabtools.py in <lambda>(fig)
248
249 if 'png' in formats:
--> 250 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
251 if 'retina' in formats or 'png2x' in formats:
252 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
/opt/conda/lib/python3.8/site-packages/IPython/core/pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
132 FigureCanvasBase(fig)
133
--> 134 fig.canvas.print_figure(bytes_io, **kw)
135 data = bytes_io.getvalue()
136 if fmt == 'svg':
/opt/conda/lib/python3.8/site-packages/matplotlib/backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
2253 # force the figure dpi to 72), so we need to set it again here.
2254 with cbook._setattr_cm(self.figure, dpi=dpi):
-> 2255 result = print_method(
2256 filename,
2257 facecolor=facecolor,
/opt/conda/lib/python3.8/site-packages/matplotlib/backend_bases.py in wrapper(*args, **kwargs)
1667 kwargs.pop(arg)
1668
-> 1669 return func(*args, **kwargs)
1670
1671 return wrapper
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args)
506 *metadata*, including the default 'Software' key.
507 """
--> 508 FigureCanvasAgg.draw(self)
509 mpl.image.imsave(
510 filename_or_obj, self.buffer_rgba(), format="png", origin="upper",
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in draw(self)
399 def draw(self):
400 # docstring inherited
--> 401 self.renderer = self.get_renderer(cleared=True)
402 # Acquire a lock on the shared font cache.
403 with RendererAgg.lock, \
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in get_renderer(self, cleared)
415 and getattr(self, "_lastKey", None) == key)
416 if not reuse_renderer:
--> 417 self.renderer = RendererAgg(w, h, self.figure.dpi)
418 self._lastKey = key
419 elif cleared:
/opt/conda/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in __init__(self, width, height, dpi)
89 self.width = width
90 self.height = height
---> 91 self._renderer = _RendererAgg(int(width), int(height), dpi)
92 self._filter_renderers = []
93
ValueError: Image size of 67861x302 pixels is too large. It must be less than 2^16 in each direction.
<Figure size 360x360 with 1 Axes>
Any suggestions and tips are greatly appreciated. Again I’m pretty confident about the CaptioningData class, just not the final image and label retrieval. I get a error saying the image is too large even though I’m resizing the image before trying to retrieve the image and label.