I have a written a network trying to classify images.
I have tested ResNet and got good results so the training works, but when i wanted to write my on Net it returns NaN after a few steps, because the outputs get to high.
I wanted to keep my outputs low by using different output functions but always got the
“one of the variables needed for gradient computation has been modified by an inplace operation” Error
Is this even the right approach or should i try something else?
class simple(nn.Module):
def __init__(self):
super(simple, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
self.conv_64to128 = nn.Conv2d(64, 128, 3, padding=1)
self.conv128 = nn.Conv2d(128, 128, 3, padding=1)
self.bn128 = nn.BatchNorm2d(128)
self.pool = nn.MaxPool2d(2, 2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # defined output size
# fully connected
self.fc = nn.Linear(128, 9)
def forward(self, x): # 3, 200,200
x = F.relu(self.conv1(x)) # 64,200,200
x = self.pool(F.relu(self.conv_64to128(x))) # 128, 100
for i in range(2):
x = self.pool(F.relu(self.conv128(x))) # 128, (50,25)
ide = x.clone()
ids = x.clone()
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x += ide
x = F.relu(x)
ide = x.clone()
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x += ide
x = F.relu(x)
ide = x.clone()
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x += ide
x = F.relu(x)
ide = x.clone()
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x += ide
x = F.relu(x)
x += ids
x = F.relu(x)
for i in range(3):
ide = x.clone()
ids = x.clone()
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x += ide
x = F.relu(x)
ide = x.clone()
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x += ide
x = F.relu(x)
ide = x.clone()
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x += ide
x = F.relu(x)
ide = x.clone()
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x = F.relu(self.bn128(self.conv128(x)))
x += ide
x = F.relu(x)
x += ids
x = F.relu(x)
x = self.avgpool(x)
x = x.reshape(x.shape[0], -1)
x = self.fc(x)
return x # 9 classes