Function SplineWightingBackward returned invalid gradient

Taxaria · May 26, 2019, 9:08am

Hey
I’m using Pytorch and Pytorch Geometric together. I’m working with the Spline-CNNs. They use a B-Spline based convolutional operator.
I’m trying to implement a Generative Adversarial Network with the Spline-CNNs which generates MNIST Superpixel-Graphs.
During the training I get the exception:
Function SplineWighting Backward returned an invalid gradient at index 1 - got [9,1,128] but expected shape compatible with [9,15,128].

Some of my code:

Imports:

import os, sys, time, datetime
import os.path as osp
import imageio
import pickle
import random
import itertools
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow, imsave
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision.utils import save_image

from torch_geometric.datasets import MNISTSuperpixels
import torch_geometric.transforms as T
from torch_geometric.data import DataLoader
from torch_geometric.data.data import Data
from torch_geometric.utils import normalized_cut
from torch_geometric.nn import (SplineConv, voxel_grid, graclus, max_pool, max_pool_x, global_mean_pool)

Parameter:

batch_size = 1
learning_rate = 0.001
epochs = 50
context_size = 10
noise_size = 15
ngf = 32 # Size of feature map generator
ndf = 32 # Size of feature map discriminator
num_features = 1 # Number of node_features MNIST-Superpixel
num_nodes = 75 ' Number of nodes per graph in MNIST-Superpixel

path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'MNIST_Graclus')
transform = T.Cartesian()

Dataset and Dataloader:

train_dataset = MNISTSuperpixels(path, True, transform=transform)

test_dataset = MNISTSuperpixels(path, False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, drop_last = True)

test_loader = DataLoader(test_dataset, batch_size = batch_size)

d = train_dataset

Discriminator:

class Discriminator(nn.Module):
    def __init__(self, batch_size, step_size, image_size, d_hidden_size, context_size, channel_size, num_classes = 1):
        super(Discriminator, self).__init__()
        self.conv1 = SplineConv(d.num_features, ndf, dim = 2, kernel_size = 5)
        self.conv2 = SplineConv(ndf, ndf*2, dim = 2, kernel_size = 5)
        self.fc1 = nn.Linear(ndf * 2, ndf*4)
        self.fc2 = nn.Linear(ndf*4, 1)


    def forward(self, data): 
        data.x = F.elu(self.conv1(data.x, data.edge_index, data.edge_attr))
        weight = normalized_cut_2d(data.edge_index, data.pos)
        cluster = graclus(data.edge_index, weight, data.x.size(0))
        data.edge_attr = None
        data = max_pool(cluster, data, transform = transform)

        data.x = F.elu(self.conv2(data.x, data.edge_index, data.edge_attr))
        weight = normalized_cut_2d(data.edge_index, data.pos)
        cluster = graclus(data.edge_index, weight, data.x.size(0))
        x, batch = max_pool_x(cluster, data.x, data.batch)
        
        x = global_mean_pool(x, batch)
        x = F.elu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        return torch.sigmoid(self.fc2(x))

Generator:

class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.conv1 = SplineConv(noise_size, ngf * 4, root_weight = None, dim = 2, kernel_size = 3)
        self.conv2 = SplineConv(ngf * 4, ngf*2, root_weight = None, dim = 2, kernel_size = 3)
        self.conv3 = SplineConv(ngf * 2, ngf, root_weight = None, dim = 2, kernel_size = 3)
        self.conv4 = SplineConv(ngf, d.num_features, root_weight = None, dim = 2, kernel_size = 3)

    def forward(self, data):
        data.x = self.conv1(data.x, data.edge_index, data.edge_attr)
        data.x = F.elu(data.x)
        data.x = F.elu(self.conv2(data.x, data.edge_index, data.edge_attr))
        data.x = F.elu(self.conv3(data.x, data.edge_index, data.edge_attr))
        data.x = torch.tanh(self.conv4(data.x, data.edge_index, data.edge_attr))
        return data

Generate “noisegraph”:

def generate_noisegraph(num_nodes, num_edges, label):
    graph = nx.gnm_random_graph(num_nodes, num_edges)
    edge_attr = torch.FloatTensor(num_edges, 2).uniform_(0,1)
    edge_index = torch.ones(())
    edge_index = edge_index.new_empty(2, num_edges).long()
    pos = torch.FloatTensor(num_nodes,2).uniform_(0,10)
    x = torch.ones(())
    x = x.new_empty(num_nodes, 1)
    y = torch.ones(())
    y = y.new_empty(1)


    for eidx in range(edge_index.size()[1]):
        (i,j) = list(graph.edges)[eidx]
        edge_index[0][eidx] = i
        edge_index[1][eidx] = j

    i = 0
    while (i < x.size()[0] // 3):
        x[i] = random.uniform(0,1)
        i += 1

    y[0] = label

    edge_attr = edge_attr.type(torch.FloatTensor)
    graph = (edge_attr, edge_index, pos, x, y)

    batch = torch.zeros(num_nodes)
    batch = batch.type(torch.LongTensor)

    graph = Data(x = x, edge_index = edge_index, edge_attr = edge_attr, y = y, pos = pos, batch = batch)
    return graph

Training code:

D = Discriminator()
G = Generator()

D.train()
G.train()

criterion = nn.BCELoss()

D_opt = torch.optim.Adam(D.parameters(), lr = learning_rate, betas=(0.5, 0.999))
G_opt = torch.optim.Adam(G.parameters(), lr= learning_rate, betas=(0.5, 0.999))


step = 0

D_labels = torch.ones([batch_size, 1])#.to(device) # Discriminator Label to real
D_fakes = torch.zeros([batch_size, 1])#.to(device) # Discriminator Label to fake

# results save folder
if not os.path.isdir('Spline-CGAN_MNIST_Ergebnisse'):
    os.mkdir('Spline-CGAN_MNIST_Ergebnisse')

train_hist = {}
train_hist['D_losses'] = []
train_hist['G_losses'] = []
train_hist['per_epoch_ptimes'] = []
train_hist['total_ptime'] = []


print('training start!')
start_time = time.time()

for epoch in range(epochs):
    D_losses = []
    G_losses = []
    epoch_start_time = time.time()
    for idx, batch in enumerate(train_loader):
        # Training Discriminator
        x = batch
        x_outputs = D(x)
        D_x_loss = criterion(x_outputs, D_labels)  
        label = batch.y
        z = generate_noisegraph(15, 60, label) 
        g_out = G(z)
        z_outputs = D(g_out)
        D_z_loss = criterion(z_outputs, D_fakes)
        D_loss = D_x_loss + D_z_loss
        
        D.zero_grad()
        D_loss.backward()
        D_opt.step()
        
        D_losses.append(D_loss.item())

        if step % n_critic == 0:
            # Training Generator
            z = generate_noisegraph(15, 60, label) 
            g_out = G(z)
            z_outputs = D(g_out)
            G_loss = criterion(z_outputs, D_labels)

            G.zero_grad()
            G_loss.backward()
            G_opt.step()

            G_losses.append(G_loss.item())
        step += 1
    G.eval()
    img = get_sample_image(G, noise_size)
    imsave('Spline-CGAN_MNIST_Ergebnisse/epoch_{}.png'.format(str(epoch)), img, cmap='gray')
    G.train()  
    epoch_end_time = time.time()
    per_epoch_ptime = epoch_end_time - epoch_start_time
    print('[%d/%d] - Ptime: %.2f, D Loss: %.3f, G Loss: %.3f' % ((epoch), epochs, per_epoch_ptime, D_loss.item(), G_loss.item()))
    train_hist['D_losses'].append(torch.mean(torch.FloatTensor(D_losses)))
    train_hist['G_losses'].append(torch.mean(torch.FloatTensor(G_losses)))
    train_hist['per_epoch_ptimes'].append(per_epoch_ptime)

end_time = time.time()
total_ptime = end_time - start_time
train_hist['total_ptime'].append(total_ptime)

print("Avg one epoch ptime: %.2f, total %d epochs ptime: %.2f" % (torch.mean(torch.FloatTensor(train_hist['per_epoch_ptimes'])), epochs, total_ptime))
print("Training finish!... save training results")
torch.save(G.state_dict(), "Spline-CGAN_MNIST_Ergebnisse/generator_param.pkl")
torch.save(D.state_dict(), "Spline-CGAN_MNIST_Ergebnisse/discriminator_param.pkl")
with open('Spline-CGAN_MNIST_Ergebnisse/train_hist.pkl', 'wb') as f:
    pickle.dump(train_hist, f)

show_train_hist(train_hist, save=True, path='Spline-CGAN_MNIST_Ergebnisse/MNIST_Spline-CGAN_train_hist.png')

Some ideas?