I’m having trouble using the input image with shape of (batchsize, image_height, image_width). I’m getting “RuntimeError: Expected 4-dimensional input for 4-dimensional weight 32 1 3 3, but got 3-dimensional input of size [1, 256, 256] instead”. Probably something simple but I cant see it.
I deleted these two lines
image = np.reshape(image, (1, image_height, image_width))
out = output.squeeze(1)
And changed this line
loss = loss_function(out, y)
To
loss = loss_function(output, y)
This is most of my code.
k_size = 3
pad = 1
class UNetMini(Module):
def __init__(self):
super(UNetMini, self).__init__()
self.block1 = Sequential(
Conv2d(1, 32, kernel_size=k_size, padding=pad),
ReLU(),
Dropout2d(0.2),
Conv2d(32, 32, kernel_size=k_size, padding=pad),
ReLU(),
)
self.pool1 = MaxPool2d((2, 2))
self.block2 = Sequential(
Conv2d(32, 64, kernel_size=k_size, padding=pad),
ReLU(),
Dropout2d(0.2),
Conv2d(64, 64, kernel_size=k_size, padding=pad),
ReLU(),
)
self.pool2 = MaxPool2d((2, 2))
self.block3 = Sequential(
Conv2d(64, 128, kernel_size=k_size, padding=pad),
ReLU(),
Dropout2d(0.2),
Conv2d(128, 128, kernel_size=k_size, padding=pad),
ReLU()
)
self.up1 = UpsamplingNearest2d(scale_factor=2)
self.block4 = Sequential(
Conv2d(192, 64, kernel_size=k_size, padding=pad),
ReLU(),
Dropout2d(0.2),
Conv2d(64, 64, kernel_size=k_size, padding=pad),
ReLU()
)
self.up2 = UpsamplingNearest2d(scale_factor=2)
self.block5 = Sequential(
Conv2d(96, 32, kernel_size=k_size, padding=pad),
ReLU(),
Dropout2d(0.2),
Conv2d(32, 32, kernel_size=k_size, padding=pad),
ReLU()
)
self.conv2d = Conv2d(32, 1, kernel_size=pad)
def forward(self, x):
out1 = self.block1(x)
out_pool1 = self.pool1(out1)
out2 = self.block2(out_pool1)
out_pool2 = self.pool1(out2)
out3 = self.block3(out_pool2)
out_up1 = self.up1(out3)
# return out_up1
out4 = torch.cat((out_up1, out2), dim=1)
out4 = self.block4(out4)
out_up2 = self.up2(out4)
out5 = torch.cat((out_up2, out1), dim=1)
out5 = self.block5(out5)
out = self.conv2d(out5)
return out
if rebuild_data:
data = BuildData()
data.make_training_data()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
processed_images = np.load("/home/ryan/CrackProject/DataSets/Data/images.npy", allow_pickle=True)
processed_masks = np.load("/home/ryan/CrackProject/DataSets/Data/masks.npy", allow_pickle=True)
model = UNetMini().to(device)
# This builds the data set
class FormsDataset(Dataset):
def __init__(self, images, masks, transforms):
self.images = images
self.masks = masks
self.transforms = transforms
def __getitem__(self, idx):
image = self.images[idx]
image = image / 255
image = np.reshape(image, (1, image_height, image_width))
if self.transforms:
image = self.transforms(image)
mask = self.masks[idx]
mask[mask > .7] = 1
mask[mask <= .7] = 0
if self.transforms:
mask = self.transforms(mask)
return image, mask
def __len__(self):
return len(self.images)
train_dataset = FormsDataset(processed_images, processed_masks, trans)
train_data_loader = DataLoader(train_dataset, batch_size=b_size, shuffle=False)
# Training loop
total_steps = len(train_data_loader)
print(f'Train dataset has {len(train_data_loader)} batches of size {b_size}')
# Loss and Optimization
loss_function = nn.BCEWithLogitsLoss(reduction='none')
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
initial = torch.rand([b_size, image_height, image_width]).to(device)
def fwd_pass(X, y):
if train:
model.zero_grad() # Clears the gradients from the last step, otherwise they would accumulate
output = model(X)
out = output.squeeze(1)
loss = loss_function(out, y)
acc = []
if train:
loss.backward(gradient=initial) # comp derivative of the loss w.r.t.the parameters(or other req grad) backprop.
optimizer.step()
return acc, loss, output
def train():
total_steps = len(train_data_loader)
print(f"{epochs} epochs, {total_steps} total_steps per epoch")
for epoch in tqdm(range(epochs), desc="Epochs"):
for i, (images, masks) in enumerate(train_data_loader):
images = images.type(torch.FloatTensor)
images = images.to(device)
masks = masks.type(torch.FloatTensor)
masks = masks.to(device)
acc, loss, output = fwd_pass(images, masks)
train()