There are snippets of code about batchnorm1d.
Even I set random seed and made the model as eval()
or with torch.no_grad():
, the output result was varied every time. But while I removed the batchnorm1d, or activation funtion F.relu()
, the output maintained the same. It’s a very stranger behavior, does anyone meet this or have already solved it ?
def set_seed(random_seed):
np.random.seed(random_seed)
random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())
device = 'cuda' if torch.cuda.is_available() else 'cpu'
data = dataset[0]
class GCN(torch.nn.Module):
def __init__(self, hidden_channels, num_layer = 10):
super().__init__()
self.num_layer = num_layer
self.conv_list = ModuleList([])
self.bn_list = ModuleList([])
self.cached = True
self.conv_list.append(GCNConv(dataset.num_features, hidden_channels, cached=self.cached, bias=False))
for _ in range(self.num_layer - 2):
self.conv_list.append(GCNConv(hidden_channels, hidden_channels, cached=self.cached, bias=False))
self.conv_list.append(GCNConv(hidden_channels, len(data.y.unique()), cached=self.cached, bias=False))
for i in range(self.num_layer):
self.bn_list.append(nn.BatchNorm1d(self.conv_list[i].out_channels, momentum=0.3))
def forward(self, x, edge_index):
for i in range(self.num_layer):
if i == 0 or i == self.num_layer-1:
x = F.dropout(x, 0.6, training=self.training)
x = self.conv_list[i](x, edge_index)
x = self.bn_list[i](x)
x = F.relu(x)
return x
set_seed(10)
model = GCN(hidden_channels=16).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
def train():
model.train()
optimizer.zero_grad() # Clear gradients.
out = model(data.x.to(device), data.edge_index.to(device)) # Perform a single forward pass.
loss = criterion(out[data.train_mask].to(device), data.y[data.train_mask].to(device)) # Compute the loss solely based on the training nodes.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
return loss
def test():
with torch.no_grad():
model.eval()
out = model(data.x.to(device), data.edge_index.to(device))
pred = out.argmax(dim=1) # Use the class with highest probability.
test_correct = pred[data.test_mask].to(device) == data.y[data.test_mask].to(device) # Check against ground-truth labels.
test_acc = int(test_correct.sum()) / int(data.test_mask.sum()) # Derive ratio of correct predictions.
return test_acc
for epoch in range(1, 201):
loss = train()
print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')