I’m currently trying to train a cnn on input and output images, which are grayscale csv.
Here is my code.
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import numpy as np
import pandas as pd
from PIL import Image
import io
# torch.manual_seed(1) # reproducible
# Hyper Parameters
EPOCH = 10
BATCH_SIZE = 256
LR = 0.005 # learning rate
DOWNLOAD_MNIST = False
N_TEST_IMG = 5
# torch.manual_seed(1) # reproducib
class MyDataset():
def __init__(self, csv_file,transform=None):
self.image_paths = pd.read_csv(csv_file, header = 0)
self.transform = transform
def __getitem__(self, index):
#print(self.image_paths[index])
#image_transformed = load_image(self.image_paths[index])
#print(index)
#print(self.image_paths.loc[[index]])
current = self.image_paths.iloc[index]
#print(current.shape)
#image_transformed = current.iloc[1]
image = Image.open(current.iloc[0])
image_transformed = Image.open(current.iloc[1])
#image, image_transformed = load_image(self.image_paths[index])
# transformations, e.g. Random Crop etc.
# Make sure to perform the same transformations on image and target
# Here is a small example: https://discuss.pytorch.org/t/torchvision-transfors-how-to-perform-identical-transform-on-both-image-and-target/10606/7?u=ptrblck
#x, y = TF.to_tensor(image), TF.to_tensor(image_transformed)
x = torch.from_numpy(np.array(image))
y = torch.from_numpy(np.array(image_transformed))
return x, y
def __len__(self):
return len(self.image_paths)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28)
trainDataSet=MyDataset("./train.csv")
testDataSet=MyDataset("./test.csv")
train_loader = Data.DataLoader(trainDataSet, batch_size=BATCH_SIZE, shuffle=True)
test_loader = Data.DataLoader(testDataSet, batch_size=BATCH_SIZE, shuffle=True)
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder,self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(1, 6, kernel_size=5),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=5),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(16,6,kernel_size=5),
nn.ReLU(True),
nn.ConvTranspose2d(6,1,kernel_size=5),
nn.ReLU(True))
def forward(self,x):
x = self.encoder(x)
x = self.decoder(x)
return x
autoencoder = Autoencoder()
autoencoder.to(device)
print(autoencoder)
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LR)
loss_func = nn.MSELoss()
df = pd.DataFrame({'Epoch':[],'Train': [],
'Test':[]})
count = 1
for epoch in range(EPOCH):
for step, (x, y) in enumerate(train_loader):
autoencoder.train()
running_loss = 0.0
b_x = x.view(-1, 750*750).to(device) # batch x, shape (batch, 28*28)
b_y = y.view(-1, 900*800).to(device) # batch y, shape (batch, 28*28)
encoded, decoded = autoencoder(b_x)
loss = loss_func(decoded, b_y) # mean square error
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
running_loss += loss.item()
if step % 100 == 0:
for step, (x, b_label) in enumerate(test_loader):
autoencoder.eval()
test_loss = 0.0
b_x = x.view(-1, 750*750).to(device) # batch x, shape (batch, 750*750)
b_y = x.view(-1, 900*800).to(device) # batch y, shape (batch, 900*800)
encoded, decoded = autoencoder(b_x)
loss = loss_func(decoded, b_y) # mean square error
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
test_loss += loss.item()
print('Epoch: ', epoch + 1, '| train loss: %.4f' % running_loss, '| test loss: %.4f' % test_loss)
df.loc[count] = [epoch + 1,running_loss, test_loss]
count = count + 1
torch.save(autoencoder.state_dict(), "./model.pt")
df.to_csv("./train-test-loss.csv", index = False)
However, I’m getting the error:
Traceback (most recent call last):
File "train_depth_map.py", line 107, in <module>
encoded, decoded = autoencoder(b_x)
File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "train_depth_map.py", line 83, in forward
x = self.encoder(x)
File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\container.py", line 117, in forward
input = module(input)
File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\conv.py", line 419, in forward
return self._conv_forward(input, self.weight)
File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\conv.py", line 416, in _conv_forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [6, 1, 5, 5], but got 2-dimensional input of size [8, 562500] instead
I don’t understand this error, as it seems like the CNN should be able to output the 2d array. Do I need a densely connected layer, and could someone help me calculate the number of inputs that densely connect layer would need to take?