I have a model that one of it’s modules is:
class LinearAttentionPriorBlock(nn.Module):
def __init__(self, pathology, normalize_attention=True):
super(LinearAttentionPriorBlock, self).__init__()
self.normalize_attn = normalize_attention
self.score_dir = "AvgAttentionScores"
self.score = self.read_score(pathology) # .to('cuda' if torch.cuda.is_available() else 'cpu')
self.score.requires_grad = False
def read_score(self, pathology):
arr = np.load(os.path.join(self.score_dir, pathology, "{:}.npy".format(pathology)))
return torch.from_numpy(arr)
def forward(self, global_f, score_shape):
b_s, n_c, width, height = score_shape
score = self.score.unsqueeze(0).expand(b_s, n_c, width, height)
normalized_scores = torch.sigmoid(score)
global_f = torch.mul(normalized_scores, global_f)
if self.normalize_attn:
global_f = global_f.view(b_s, n_c, -1).sum(dim=2) # batch_size X C
else:
global_f = functional.adaptive_avg_pool2d(global_f, (1, 1)).view(b_s, n_c)
return global_f
In the __init__
of the model I do the following:
def __init__(self, num_classes, use_attention=True, normalize_attention=True, is_pretrained=True, use_prior=False):
super().__init__()
self.use_prior = use_prior
self.att_status = "w" if use_prior else "wo"
self.use_attention = use_attention
self.normalize_attention = normalize_attention
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.resnet = models.resnet50(pretrained=is_pretrained)
fc_num_features = self.resnet.fc.in_features
if not self.use_prior:
self.projector_1 = ProjectorBlock(256, 2048)
self.projector_2 = ProjectorBlock(512, 2048)
self.projector_3 = ProjectorBlock(1024, 2048)
self.attention_1 = LinearAttentionBlock(in_f=fc_num_features, normalize_attention=normalize_attention)
self.attention_2 = LinearAttentionBlock(in_f=fc_num_features, normalize_attention=normalize_attention)
self.attention_3 = LinearAttentionBlock(in_f=fc_num_features, normalize_attention=normalize_attention)
self.classifier = nn.Linear(in_features=fc_num_features * 3, out_features=num_classes, bias=True)
else:
self.projector_3 = ProjectorBlock(1024, 2048)
self.attention_3 = LinearAttentionBlock(in_f=fc_num_features, normalize_attention=normalize_attention)
self.prior_layers = nn.ModuleList([LinearAttentionPriorBlock(LABELS[i],
normalize_attention=normalize_attention)
for i in range(len(LABELS))])
self.classifiers = nn.ModuleList([nn.Linear(in_features=fc_num_features,
out_features=1,
bias=True).to(self.device) for _ in range(len(LABELS))])
Which suppose to pass everything properly to the gpu, so when I do nn.DataParallel(model)
I expect it to work but I get the following error message:
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1!
My guess is that I do something wrong in the init of my custom module but I can’t figure out what is the issue. And because of that I can only train it on a single GPU
Would appreciate some help