Hi,
I’m writing a graph conv network and when I try to set a batch size greater than 1, I have some errors.
I’ve tried many different test, but it still the same : I can’t finish a epoch (sometimes error appears during the forward, and sometimes during the loss).
Sorry but I haven’t kept every solution I’ve tried, so here is the “running” one.
Some of the code are inspired by this one : GraphNeuralNet/main.py at master · praxidike97/GraphNeuralNet · GitHub
To not overload the post, here is only the important steps of my code (still long, sorry) :
import...
...
class GCNN_EL(InMemoryDataset):
def __init__(self, root, transform=None, pre_transform=None):
super(GCNN_EL, self).__init__(root, transform, pre_transform)
self.data, self.slices = torch.load(self.processed_paths[1])
....
class SimpleConv(MessagePassing):
def __init__(self, in_channels, out_channels, aggr="max", **kwargs):
super(SimpleConv, self).__init__(aggr=aggr, **kwargs)
self.lin = Linear(in_channels, out_channels, bias=True)
self.relu = torch.nn.ReLU()
self.reset_parameters()
def reset_parameters(self):
self.lin.reset_parameters()
def forward(self, x, edge_index, edge_attr):
return self.propagate(edge_index, x=self.lin(x.type('torch.FloatTensor').to(device)), edge_attr=edge_attr)
def message(self, x_i, x_j, edge_attr):
return self.relu(x_j)
class Net(torch.nn.Module):
def __init__(self,nb_convs,edge_nb_feats):
super(Net,self).__init__()
self.nb_convs=nb_convs
self.convs_mod=[]
conv_input_size=1
conv_total_size=1
for i in range(nb_convs):
self.convs_mod.append(SimpleConv(conv_input_size, 4).to(device))
conv_input_size = 4
conv_total_size += conv_input_size
self.lin1 = torch.nn.Linear(1525*4, 3000)
self.lin2 = torch.nn.Linear(3000, 1500)
self.lin3 = torch.nn.Linear(1500, 500)
self.lin4 = torch.nn.Linear(500, 1)
self.relu = torch.nn.ReLU()
def forward(self, data):
convs = []
for c in self.convs_mod:
nf = c(data.x, data.edge_index, data.edge_attr)
convs.append(nf)
flat = torch.cat([convs[-1]], dim=1).reshape(-1,)
padded = torch.nn.ConstantPad1d((0, 1525*4-flat.shape[0]), 0)(flat)
hid1 = self.relu(self.lin1(padded))
hid2 = self.relu(self.lin2(hid1))
hid3 = self.relu(self.lin3(hid2))
out = self.lin4(hid3)
return out
dataset = GCNN_EL('data')
dataset = dataset.shuffle()
# Only 32 for testing purpose
dataset = dataset[:32]
train_loader = DataLoader(dataset, batch_size=16)
model = Net(5, 1)
model = model.to(device)
crit = torch.nn.MSELoss().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for e in range(0, epoch):
model.train()
print("#################### Training - Epoch %d/%d"%(e+1, epoch))
for data in train_loader:
data = data.to(device)
optimizer.zero_grad()
output = model.forward(data)
loss = crit(output.float(), data.tag.float())
loss.backward()
optimizer.step()
running_loss += loss.item()
....
Inside the loop over the epochs, the Batch data is like :
Batch(batch=[8552], edge_attr=[8552, 1], edge_index=[2, 8552], x=[8552, 1], y=[16])
And the actual error is :
Traceback (most recent call last):
File "./regression_el.py", line 396, in <module>
output = model.forward(data)
File "./regression_el.py", line 282, in forward
nf = c(data.x, data.edge_index, data.edge_attr)
File "/opt/exp_soft/vo.gridcl.fr/software/torch/1.5.0/py37-cuda9.2/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "./regression_el.py", line 206, in forward
return self.propagate(edge_index, x=self.lin(x.type('torch.FloatTensor').to(device)), edge_attr=edge_attr)
File "/opt/exp_soft/vo.gridcl.fr/software/torch/1.5.0/py37-cuda9.2/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/opt/exp_soft/vo.gridcl.fr/software/torch/1.5.0/py37-cuda9.2/lib/python3.7/site-packages/torch/nn/modules/linear.py", line 87, in forward
return F.linear(input, self.weight, self.bias)
File "/opt/exp_soft/vo.gridcl.fr/software/torch/1.5.0/py37-cuda9.2/lib/python3.7/site-packages/torch/nn/functional.py", line 1610, in linear
ret = torch.addmm(bias, input, weight.t())
RuntimeError: size mismatch, m1: [8552 x 1], m2: [4 x 4] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:283
Please tell me if you need more details.
Thanks in advance if some has a lead.