I got my code running right but it takes too much time and loss value is too high
I found out that the dataloader isn’t getting the right batch size.
It’s getting the whole data in the model.
number of data is 3607 each (img and mask)
I want the batch size to be 1
How can I fix it??
class BasicDataset(Dataset):
def __init__(self, imgs_dir, masks_dir):
self.imgs_dir = imgs_dir
self.masks_dir = masks_dir
self.mriids = next(os.walk(self.imgs_dir))[2]
self.maskids = next(os.walk(self.masks_dir))[2]
def atoi(text):
return int(text) if text.isdigit() else text
def natural_keys(text):
return [atoi(c) for c in re.split(r'(\d+)', text) ]
self.mriids = sorted(self.mriids, key = natural_keys)
self.maskids = sorted(self.maskids, key = natural_keys)
def __len__(self):
return len(self.mriids)
def __getitem__(self, idx):
mriidx = self.mriids[idx] #img file name
maskidx = self.maskids[idx] #mask file name
mask_file = os.path.join(self.masks_dir, maskidx)
img_file = os.path.join(self.imgs_dir, mriidx)
img = Image.open(img_file).convert("RGB")
mask = Image.open(mask_file).convert("L")
mask = np.array(mask)
img = np.array(img)
mask = np.expand_dims(mask, axis=2)
img = np.transpose(img, (2, 0, 1))
mask = np.transpose(mask, (2, 0, 1))
# obj_ids = np.unique(mask)
obj_ids = np.unique(mask)
obj_ids = obj_ids[1:]
num_objs = len(obj_ids)
labels = torch.ones((num_objs,), dtype=torch.int64)
mask = torch.as_tensor(mask, dtype=torch.uint8)
image_id = torch.tensor([idx])
target = {}
target["labels"] = labels
target["masks"] = mask
target["image_id"] = image_id
return img, target
epochs = 100
batch_size = 1
lr = 0.00001
momentum = 0.99
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
loss_func = nn.BCEWithLogitsLoss().to(device)
gen = BasicDataset('/home/intern/Desktop/YH/Brain_MRI/BrainMRI_train/MRI/MRI/', '/home/intern/Desktop/YH/Brain_MRI/BrainMRI_train/mask/mask/')
train_loader = DataLoader(gen, batch_size=batch_size)
total_batch = len(gen)
print(total_batch)
model.train()
print("start training")
for epoch in range(epochs):
t0 = time.time()
for mri, true_mask in train_loader:
mri = mri.type(torch.FloatTensor)
true_mask = true_mask["masks"]
true_mask = true_mask.type(torch.FloatTensor)
mri = mri.to(device)
true_mask = true_mask.to(device)
pred_mask = model(mri)
loss = loss_func(pred_mask, true_mask)
optimizer.zero_grad()
loss.backward()
optimizer.step()
t1 = time.time()
print('[Epoch:{}], loss = {}, time = {}'.format(epoch+1, loss, t1-t0))
print('training Finished!')
3607
start training [Epoch:1], loss = nan, time = 141.68572974205017 [Epoch:2], loss = nan, time = 143.46247911453247 [Epoch:3], loss = nan, time = 143.64162826538086
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-72-0b9f410e970f> in <module>()
19 # true_mask = true_mask.squeeze(1)
20
---> 21 mri = mri.to(device)
22 true_mask = true_mask.to(device)
23
KeyboardInterrupt: