I am new to PyTorch and I am afraid I do not understand some concept. I have a binary classifier (dog vs cat) of images 64x64.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 50, 5)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(50, 100, 7)
self.pool2 = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(100 * 12 * 12, 120)
self.fc2 = nn.Linear(120, 100)
self.fc3 = nn.Linear(100, 2)
def forward(self, x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
print(x.shape) <---- problem is HERE
x = x.view(100, 100 * 12 * 12)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
When I train the network (using one tutorial) I am wrong size error:
RuntimeError: shape '[100, 14400]' is invalid for input of size 273600
When I try to call:
net = Net()
output = net(train_data)
Where train_data is an array of N (here 19) images:
train_data = []
for i in range(len(train_addrs[:100])):
# read an image and resize to (IMAGE_SIZE, IMAGE_SIZE)
# cv2 load images as BGR, convert it to RGB
addr = train_addrs[i]
img = cv2.imread(addr)
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_CUBIC)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
train_data.append([np.array(img), np.array(train_labels[i])])
shuffle(train_data)
In other words: my network expects one image at a time. Why?
Hello,
I think the problem occurs during flattening… x.view(100, 100*12*12) only works with a batch of 100.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 50, 5)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(50, 100, 7)
self.pool2 = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(100 * 12 * 12, 120)
self.fc2 = nn.Linear(120, 100)
self.fc3 = nn.Linear(100, 2)
self.flatten = nn.Flatten()
def forward(self, x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
print(x.shape)
#x = x.view(100, 100 * 12 * 12)
x = self.flatten(x) # or x = x.view(-1, 100 * 12 * 12)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
N, C, H, W = 200, 3, 64, 64
train_data = torch.rand(N, C, H, W)
net = Net()
output = net(train_data)
I prefer using nn.Flatten to this purpose. To specify the output size you want to passe to the fully connected layers you can use nn.AdaptiveMaxPool2d.