I’ve seen very similar posts, but I’m quite new to PyTorch and couldn’t figure this out. I’m getting a
RuntimeError: size mismatch, m1: [16 x 4096], m2: [1024 x 3] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:136
I know that this is occurring because the input features are not adding up but not sure how to fix this.
##################
# DATA FUNCTIONS #
##################
def channels_last_to_first(img):
img = np.swapaxes(img, 0,2)
img = np.swapaxes(img, 1,2)
return img
def preprocess_input(img, model):
# assume image is RGB
img = img[..., ::-1].astype('float32')
model_min = model.input_range[0] ; model_max = model.input_range[1]
img_min = float(np.min(img)) ; img_max = float(np.max(img))
img_range = img_max - img_min
model_range = model_max - model_min
img = (((img - img_min) * model_range) / img_range) + model_min
img[..., 0] -= model.mean[0]
img[..., 1] -= model.mean[1]
img[..., 2] -= model.mean[2]
img[..., 0] /= model.std[0]
img[..., 1] /= model.std[1]
img[..., 2] /= model.std[2]
return img
class CXRDataset(Dataset):
#
def __init__(self, imgfiles, labels, resize=None, preprocess=None, transform=None):
self.imgfiles = imgfiles
self.labels = labels
self.preprocess = preprocess
self.resize = resize
self.transform = transform
#
def __len__(self):
return len(self.imgfiles)
#
def __getitem__(self, i):
X = cv2.imread(self.imgfiles[i])
if self.resize: X = self.resize(X)
if self.transform: X = self.transform(image=X)['image']
X = channels_last_to_first(X)
y = np.asarray(self.labels[i])
if self.preprocess: X = self.preprocess(X)
return torch.from_numpy(X).type('torch.FloatTensor'), torch.from_numpy(y).type('torch.LongTensor')
##########
# SCRIPT #
##########
print (">>CNNs for CXRs<<")
torch.device('cpu') ; torch.backends.cudnn.benchmark = True
if not os.path.exists(save_dir): os.makedirs(save_dir)
cxr_df = pd.read_csv(data_splits)
split = 'split{}'.format(val_split)
train_df = cxr_df[cxr_df[split] == 'train'].reset_index(drop=True)
valid_df = cxr_df[cxr_df[split] == 'valid'].reset_index(drop=True)
print ('TRAIN : n = {}'.format(train_df.shape[0]))
print ('VALID : n = {}'.format(valid_df.shape[0]))
# Run model script
print ('Loading pretrained model [{}] ...'.format(model))
# exec(open(load_model).read())
m = pretrainedmodels.__dict__['densenet121'](num_classes=1000, pretrained='imagenet')
dim_feats = m.last_linear.in_features
print(dim_feats)
m.last_linear = nn.Sequential(nn.Dropout(_dropout_p), nn.Linear(dim_feats, _nb_classes)) # Must change dropout_p and nb_classes in load_model python file
m.train()
params = {'batch_size': _batch_size,
'shuffle': True,
'num_workers': 0} # num_workers is 0 when cpu and 4 when gpu
# Set up preprocessing function with model
pp = partial(preprocess_input, model=m)
print ('Setting up data loaders ...')
train_images = [os.path.join(data_dir, _) for _ in train_df.fileName]
train_set = CXRDataset(imgfiles=train_images,
labels=train_df.label,
preprocess=pp,
transform=train_aug)
train_gen = DataLoader(train_set, **params)
valid_images = [os.path.join(data_dir, _) for _ in valid_df.fileName]
valid_set = CXRDataset(imgfiles=valid_images,
labels=valid_df.label,
preprocess=pp)
valid_gen = DataLoader(valid_set, **params)
# Calculate inverse frequency weights based on training data distribution
weights = []
weights.append(1. / len(np.where(train_df.label==0)[0]))
weights.append(1. / len(np.where(train_df.label==1)[0]))
weights.append(1. / len(np.where(train_df.label==2)[0]))
weights = np.asarray(weights)
#weights *= args.nb_classes / float(np.sum(weights))
criterion = nn.CrossEntropyLoss(weight=torch.from_numpy(weights).type('torch.FloatTensor'))
optimizer = optim.Adam(m.parameters(),
lr=_initial_lr,
weight_decay=_weight_decay)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max',
factor=_annealing_factor,
patience=_lr_patience,
threshold=_min_delta,
threshold_mode='abs',
verbose=True)
best_auc = 0.
stopping = 0
num_epochs = 0 ; steps = 0 ; steps_per_epoch = _steps_per_epoch
start_time = datetime.datetime.now()
print ('TRAINING : START')
while num_epochs < _max_epochs:
running_loss = 0.
for i, data in enumerate(train_gen):
batch, labels = data
optimizer.zero_grad()
output = m(batch)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
steps += 1
if steps % _verbosity == 0: # print every 100 mini-batches
print('epoch {epoch}, batch {batch} : loss = {train_loss:.4f}'.format(epoch=str(num_epochs + 1).zfill(3), batch=steps, train_loss=running_loss / _verbosity))
running_loss = 0.
if steps % steps_per_epoch == 0:
# Validate
with torch.no_grad():
m = m.eval()
val_loss = 0.
val_y_pred = [] ; val_y_true = []
for i, data in enumerate(valid_gen):
batch, labels = data
batch = batch.view(batch.shape[0],-1)
output = m(batch)
loss = criterion(output, labels)
val_loss += loss.item()
val_y_pred.extend(output.cpu().numpy())
val_y_true.extend(labels.numpy())
val_y_pred = np.asarray(val_y_pred)
val_y_true = np.asarray(val_y_true)
val_loss /= float(len(valid_gen))
val_auc_binary = roc_auc_score(val_y_true, val_y_pred[:,-1])
print ('epoch {epoch} // VALIDATION : loss = {loss:.4f}, auc = {auc:.4f}'.format(epoch=str(num_epochs + 1).zfill(3), loss=val_loss, auc=val_auc_binary))
scheduler.step(val_auc_binary)
torch.save(m.state_dict(), os.path.join(save_dir, '{arch}_{epoch}-{val_loss:.4f}-{val_auc:.4f}.pth'.format(arch=m.upper(), epoch=str(num_epochs + 1).zfill(3), val_loss=val_loss, val_auc=val_auc_binary)))
m = m.train()
# Early stopping
if val_auc_binary > (best_auc + _min_delta):
best_auc = val_auc_binary
stopping = 0
else:
stopping += 1
if stopping >= _stop_patience:
num_epochs = _max_epochs
break
num_epochs += 1
steps = 0
print ('TRAINING : END')
print ('Training took {}\n'.format(datetime.datetime.now() - start_time))
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-11-aa0dc90d0d4d> in <module>
11 optimizer.zero_grad()
12
---> 13 output = m(batch)
14 loss = criterion(output, labels)
15 loss.backward()
~/ckk_covid/lib/python3.5/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
~/ckk_covid/lib/python3.5/site-packages/pretrainedmodels/models/torchvision_models.py in forward(self, input)
197 def forward(self, input):
198 x = self.features(input)
--> 199 x = self.logits(x)
200 return x
201
~/ckk_covid/lib/python3.5/site-packages/pretrainedmodels/models/torchvision_models.py in logits(self, features)
192 x = F.avg_pool2d(x, kernel_size=7, stride=1)
193 x = x.view(x.size(0), -1)
--> 194 x = self.last_linear(x)
195 return x
196
~/ckk_covid/lib/python3.5/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
~/ckk_covid/lib/python3.5/site-packages/torch/nn/modules/container.py in forward(self, input)
98 def forward(self, input):
99 for module in self:
--> 100 input = module(input)
101 return input
102
~/ckk_covid/lib/python3.5/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
~/ckk_covid/lib/python3.5/site-packages/torch/nn/modules/linear.py in forward(self, input)
85
86 def forward(self, input):
---> 87 return F.linear(input, self.weight, self.bias)
88
89 def extra_repr(self):
~/ckk_covid/lib/python3.5/site-packages/torch/nn/functional.py in linear(input, weight, bias)
1368 if input.dim() == 2 and bias is not None:
1369 # fused op is marginally faster
-> 1370 ret = torch.addmm(bias, input, weight.t())
1371 else:
1372 output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [16 x 4096], m2: [1024 x 3] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:136