Good afternoon,
I’m a newbie in PyTorch, building a binary classification model based on 2 inputs: images and numeric data.
Here’s the custom dataset code and the model as well:
class FaceLandmarksDataset(Dataset):
"""Face Landmarks dataset."""
def __init__(self, data_frame, root_dir, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.data_frame = data_frame
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.data_frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 2])
image = color.rgba2rgb(io.imread(img_name))
landmarks = self.data_frame.iloc[idx, 3:]
landmarks = np.array([landmarks]).astype('float').reshape(-1, 2)
labels = self.data_frame.iloc[idx, 1].reshape(1)
# labels = labels.squeeze()
sample = {'image': image, 'landmarks': landmarks, 'labels': labels}
if self.transform:
sample = self.transform(sample)
return sample
class Rescale(object):
"""Rescale the image in a sample to a given size.
Args:
output_size (tuple or int): Desired output size. If tuple, output is
matched to output_size. If int, smaller of image edges is matched
to output_size keeping aspect ratio the same.
"""
def __init__(self, output_size):
assert isinstance(output_size, (int, tuple))
self.output_size = output_size
def __call__(self, sample):
image, landmarks, labels = sample['image'], sample['landmarks'], sample['labels']
h, w = image.shape[:2]
if isinstance(self.output_size, int):
if h > w:
new_h, new_w = self.output_size * h / w, self.output_size
else:
new_h, new_w = self.output_size, self.output_size * w / h
else:
new_h, new_w = self.output_size
new_h, new_w = int(new_h), int(new_w)
img = transform.resize(image, (new_h, new_w))
# h and w are swapped for landmarks because for images,
# x and y axes are axis 1 and 0 respectively
landmarks = landmarks * [new_w / w, new_h / h]
return {'image': img, 'landmarks': landmarks, 'labels': labels}
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
image, landmarks, labels = sample['image'], sample['landmarks'], sample['labels']
# swap color axis because
# numpy image: H x W x C
# torch image: C x H x W
image = image.transpose((2, 0, 1))
img_torch = torch.from_numpy(image)
landmarks_torch = torch.from_numpy(landmarks.flatten())
labels_torch = torch.from_numpy(labels).type(torch.float64)
return {'image': img_torch,
'landmarks': landmarks_torch,
'labels': labels_torch}
Here’s a printout of data size and dtypes:
Images - torch.Size([3, 224, 224]) torch.float64, numeric - torch.Size([96]) torch.float64 and labels - torch.Size([1]) torch.float64.
class MixedNetwork(nn.Module):
def __init__(self):
super(MixedNetwork, self).__init__()
image_modules = list(models.resnet50().children())[:-1]
self.image_features = nn.Sequential(*image_modules)
self.landmark_features = nn.Sequential(
nn.Linear(in_features=96, out_features=192,bias=False),
nn.ReLU(inplace=True),
nn.Dropout(p=0.25),
nn.Linear(in_features=192,out_features=1000,bias=False),
nn.ReLU(inplace=True),
nn.Dropout(p=0.25))
self.combined_features = nn.Sequential(
nn.Linear(3048, 512),
nn.ReLU(),
nn.Linear(512, 32),
nn.ReLU(),
nn.Linear(32,1))
def forward(self, image, landmarks):
a = self.image_features(image)
print("shape of a", a.shape)
b = self.landmark_features(landmarks)
print("shape of b", b.shape)
x = torch.cat((a.view(a.size(0), -1), b.view(b.size(0), -1)), dim=1)
x = self.combined_features(x)
x = F.sigmoid(x)
return x
class Trainer():
def __init__(self,criterion = None,optimizer = None,schedular = None):
self.criterion = criterion
self.optimizer = optimizer
self.schedular = schedular
def train_batch_loop(self,model,train_dataloader):
train_loss = 0.0
train_acc = 0.0
for sample in train_dataloader:
# move the data to CPU
images = sample["image"].type(torch.cuda.DoubleTensor).to(device)
landmarks = sample["landmarks"].type(torch.cuda.DoubleTensor).to(device)
labels = sample["labels"].type(torch.cuda.DoubleTensor).to(device)
self.optimizer.zero_grad()
logits = model(images, landmarks)
labels=labels.to(torch.int64)
loss = self.criterion(logits, labels)
loss.backward()
self.optimizer.step()
train_loss += loss.item()
train_acc += accuracy(logits, labels)
return train_loss / len(train_dataloader), train_acc / len(train_dataloader)
def valid_batch_loop(self,model,val_dataloader):
valid_loss = 0.0
valid_acc = 0.0
for sample in val_dataloader:
# move the data to CPU
images = sample["image"].type(torch.cuda.DoubleTensor).to(device)
landmarks = sample["landmarks"].type(torch.cuda.DoubleTensor).to(device)
labels = sample["labels"].type(torch.cuda.DoubleTensor).to(device)
self.optimizer.zero_grad()
logits = model(images, landmarks)
loss = self.criterion(logits,labels)
valid_loss += loss.item()
valid_acc += accuracy(logits,labels)
return valid_loss / len(val_dataloader), valid_acc / len(val_dataloader)
def fit(self,model,trainloader,validloader,epochs):
valid_min_loss = np.Inf
for i in range(epochs):
model.train() # this turn on dropout
avg_train_loss, avg_train_acc = self.train_batch_loop(model,trainloader) ###
model.eval() # this turns off the dropout lapyer and batch norm
avg_valid_loss, avg_valid_acc = self.valid_batch_loop(model,validloader) ###
if avg_valid_loss <= valid_min_loss :
print("Valid_loss decreased {} --> {}".format(valid_min_loss,avg_valid_loss))
torch.save(model.state_dict(),'ColabCatPainModel.pt')
valid_min_loss = avg_valid_loss
print("Epoch : {} Train Loss : {:.6f} Train Acc : {:.6f}".format(i+1, avg_train_loss, avg_train_acc))
print("Epoch : {} Valid Loss : {:.6f} Valid Acc : {:.6f}".format(i+1, avg_valid_loss, avg_valid_acc))
model = MixedNetwork()
model.double()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters() , lr = 0.001)
epochs = 25
batch_size = 1
trainer = Trainer(criterion,optimizer)
trainer.fit(model, train_dataloader, val_dataloader,epochs = epochs)
Now, I keep getting the error message:
RuntimeError: Found dtype Long but expected Double
I don’t understand where this Long is coming from - I’ve converted everything to Double in Trainer class. Could you, please, help?