Hi Guys, i am trying to create a ResNet GANs architecture for images of dimension 224x224 pixels.
I’ve found plenty of resources online on creating a ResNet classifier so this is how my discriminator would look like:
class ResidualBlock(nn.Module):
expansion = 1
def __init__(self, inchannel, outchannel, stride=1):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(outchannel),
)
self.conv2 = nn.Sequential(
nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(outchannel)
)
self.skip = nn.Sequential()
if stride != 1 or inchannel != self.expansion * outchannel:
self.skip = nn.Sequential(
nn.Conv2d(inchannel, self.expansion * outchannel, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion * outchannel)
)
def forward(self, X):
out = F.relu(self.conv1(X))
out = self.conv2(out)
out += self.skip(X)
out = F.relu(out)
return out
class Discriminator(nn.Module):
def __init__(self, ResidualBlock, num_classes):
super(Discriminator, self).__init__()
self.inchannel = 64
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64),
)
self.layer1 = self.make_layer(ResidualBlock, 64, 2, stride=1)
self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)
self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)
self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)
self.fc = nn.Linear(512*ResidualBlock.expansion, num_classes)
def make_layer(self, block, channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.inchannel, channels, stride))
self.inchannel = channels * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.conv1(x))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, out.size()[3])
out = torch.flatten(out,1 )
out = self.fc(out)
return out
Now this discriminator has 4 layers with two residual blocks each and i would like to create a generator with more or less the same depth. The problem is that i am extremely confused on how i should create this network. I wish to start from a noise dimension of 100 and reach a image dimension of 224x224x3. I don’t understand how i should include the upsampling into my basic block, if someone could give me an input on how one of these generator’s layer should look like i would really appreciate it.