Hi ptrblck,
I have experienced similar problem and would really appreciate your help.
I am trying to include nn.embedding for my transformer model but experienced x_input.grad.data is NoneType error for x_grad = torch.sign(x_input.grad.data) in my create_augmented_data function.
Here is my code for my model:
class MLP(torch.nn.Module):
def __init__(self, num_fc_layers, num_fc_units, dropout_rate, dim_num_heads):
super().__init__()
if num_fc_units % dim_num_heads != 0:
num_fc_units = num_fc_units//dim_num_heads * dim_num_heads
embed_dim = num_fc_units
self.embedding = nn.Embedding(998, embed_dim).requires_grad_(True)
self.transformer_encoder_layer = nn.TransformerEncoderLayer(
d_model=embed_dim,
nhead=dim_num_heads, # Number of heads in the multiheadattention models
dim_feedforward=embed_dim,
dropout=0.1,
activation='relu'
)
self.transformer_encoder = nn.TransformerEncoder(self.transformer_encoder_layer, num_layers=num_fc_layers)
embed_dim = num_fc_units
self.layers = nn.ModuleList()
self.layers.append(nn.Linear(embed_dim, embed_dim))
self.layers.append(nn.ReLU(True))
self.layers.append(nn.Dropout(p=dropout_rate))
for i in range(num_fc_layers):
self.layers.append(nn.Linear(embed_dim, embed_dim))
self.layers.append(nn.ReLU(True))
self.layers.append(nn.Dropout(p=dropout_rate))
self.output_layer = (nn.Linear(embed_dim, 24))
def forward(self, x):
x = x.long()
x = self.embedding(x)
x = torch.transpose(x, 0, 1)
x = self.transformer_encoder(x)
x = torch.transpose(x, 0, 1)
x = torch.mean(x, dim=1)
for i in range(len(self.layers)):
x = self.layers[i](x)
x = self.output_layer(x)
return x
Code for create_augmented_data function:
def create_augmented_data(x_train, y_train, eps, model):
x_input = torch.from_numpy(x_train).float()
y_true = torch.from_numpy(y_train).long()
x_input = x_input.to(device)
y_true = y_true.to(device)
x_input = Variable(x_input, requires_grad=True)
y_true = Variable(y_true)
train_outputs = model(x_input)
for name, param in model.named_parameters():
if param.grad == None:
print(name, 'is None')
else:
print('param {}: {}'.format(name, param.grad.abs().sum()))
ad_loss = torch.nn.CrossEntropyLoss()
loss_cal = ad_loss(train_outputs, y_true)
loss_cal.backward(retain_graph=True)
x_grad = torch.sign(x_input.grad.data)
x_adversarial = x_input + eps * x_grad
x_aug = torch.cat([x_input, x_adversarial], dim=0)
y_aug = torch.cat([y_true, y_true], dim=0)
return x_aug, y_aug
Code for training:
model = MLP(num_fc_layers, num_fc_units, dropout_rate, num_heads).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
for epoch in range(2):
# print("EPOCHS: {0}".format(epoch))
train_losses = []
correct = 0
loss = 0
total_predictions = 0
model.train()
for i, (x,y) in enumerate(train_loader):
x_input, y_input = x.numpy(), y.numpy()
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
model.eval()
x_aug, y_aug = create_augmented_data(x_input, y_input, eps=1.0, model=model)
x_aug, y_aug = x_aug.to(device), y_aug.to(device)
model.train()
output = model(x_aug)
loss = criterion(output, y_aug)
train_losses.append(loss)
loss.backward()
optimizer.step()
pred = output.max(1, keepdim=True)[1]
correct+= pred.eq(y_aug.view_as(pred)).sum().item()
total_predictions += y_aug.size(0)
Attempt that I made to solve the problem include:
- Print for each step to spot where the problem is coming from, but everything seems to be fine up to this line
x_grad = torch.sign(x_input.grad.data)
,print(x_input.grad)
before this line output None.
Variable: x_input.shape: torch.Size([32, 150])
Variable: y_true.shape: torch.Size([32])
train_outputs.shape: torch.Size([32, 24])
ad_loss: CrossEntropyLoss()
loss_cal: tensor(3.1756, grad_fn=<NllLossBackward0>)
x_input.grad None
- Print .require_grad for each model parameters after train_outputs = model(x_input), all parameters’ requires_grad values is True
for name, param in model.named_parameters():
print(name, param.requires_grad)
- Check if
x_input
is not a leaf tensor or if there are any other computation operation in between
# include this two line after loss_cal.backward(retain_graph=True)
print(x_input.grad_fn) # return None
print(x_input.is_leaf) # return True
- Check for .grad for all parameters, all param.grad is None.
for name, param in model.named_parameters():
if param.grad == None:
print(name, 'is None')
else:
print('param {}: {}'.format(name, param.grad.abs().sum()))
Thank you so much for your help!