I cannot reproduce it on Google Colab using the following script:
import torch
from torch import nn
import time
from datetime import datetime
import numpy as np
class ResidualLearningNet(nn.Module):
def __init__(self):
super(ResidualLearningNet, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=11, padding=5),
nn.BatchNorm2d(64),
nn.ReLU(),
)
self.conv2 = nn.Sequential(
nn.Conv2d(64, 32, kernel_size=1, padding=0),
nn.BatchNorm2d(32),
nn.ReLU(),
)
self.conv3 = nn.Sequential(
nn.Conv2d(32, 1, kernel_size=7, padding=3)
)
self.conv4 = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=11, padding=5),
nn.BatchNorm2d(64),
nn.ReLU(),
)
self.conv5 = nn.Sequential(
nn.Conv2d(64, 32, kernel_size=1, padding=0),
nn.BatchNorm2d(32),
nn.ReLU(),
)
self.conv6 = nn.Sequential(
nn.Conv2d(32, 1, kernel_size=7, padding=3)
)
self.conv7 = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=11, padding=5),
nn.BatchNorm2d(64),
nn.ReLU(),
)
self.conv8 = nn.Sequential(
nn.Conv2d(64, 32, kernel_size=1, padding=0),
nn.BatchNorm2d(32),
nn.ReLU(),
)
self.conv9 = nn.Sequential(
nn.Conv2d(32, 1, kernel_size=7, padding=3)
)
self.conv10 = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=11, padding=5),
nn.BatchNorm2d(64),
nn.ReLU(),
)
self.conv11 = nn.Sequential(
nn.Conv2d(64, 32, kernel_size=1, padding=0),
nn.BatchNorm2d(32),
nn.ReLU(),
)
self.conv12 = nn.Sequential(
nn.Conv2d(32, 1, kernel_size=7, padding=3)
)
def forward(self, x):
identify1=x
out = self.conv1(x)
out = self.conv2(out)
out = self.conv3(out)
out = out + identify1
identify2=out
out = self.conv4(out)
out = self.conv5(out)
out = self.conv6(out)
out = out + identify2
identify3=out
out = self.conv7(out)
out = self.conv8(out)
out = self.conv9(out)
out = out + identify3
identify4=out
out = self.conv10(out)
out = self.conv11(out)
out = self.conv12(out)
out = out + identify4
return out
model = ResidualLearningNet().cuda()
optimizer=torch.optim.SGD(model.parameters(),lr=0.000001)
criterion=nn.MSELoss()
n_epochs=6
print_every=1
val_loss_min=np.Inf
for epoch in range(n_epochs):
train_loss=0
model.train()
for iteration in range(10):
print('Iteration:',iteration,'started')
torch.cuda.synchronize()
strat_iteration=datetime.now()
optimizer.zero_grad()
Xstage1 = torch.randn(128, 1, 64, 64, device='cuda')
output=model(Xstage1)
X_batch = torch.rand_like(output)
loss=criterion(output,X_batch)
loss.backward(retain_graph=True)
optimizer.step()
train_loss+=loss.item()*X_batch.shape[0]
torch.cuda.synchronize()
end_iteration = datetime.now()
print('Iteration stopped.')
print('Iteration time:',end_iteration-strat_iteration)
Output on Colab:
Iteration: 0 started
Iteration stopped.
Iteration time: 0:00:00.609628
Iteration: 1 started
Iteration stopped.
Iteration time: 0:00:00.560231
Iteration: 2 started
Iteration stopped.
Iteration time: 0:00:00.559885
Iteration: 3 started
Iteration stopped.
Iteration time: 0:00:00.560044
Iteration: 4 started
Iteration stopped.
Iteration time: 0:00:00.559213
Iteration: 5 started
Iteration stopped.
Iteration time: 0:00:00.560005
Iteration: 6 started
Iteration stopped.
Iteration time: 0:00:00.559196
Iteration: 7 started
Iteration stopped.
Iteration time: 0:00:00.560828
Iteration: 8 started
Iteration stopped.
Iteration time: 0:00:00.559657
Iteration: 9 started
Iteration stopped.
Iteration time: 0:00:00.560312
Iteration: 0 started
Iteration stopped.
Iteration time: 0:00:00.560171
...