I am using google Colab GPU which uses Tesla K80
I tried changing num_worker to 2,4,8,16 but it didn’t speed up the training
But I tried your way
I tried running my model without any data loading like your above code just randomly initializing a 540*540 image and passing it to the model in separate collab file to check if there is any problem in data loading or my model itself
But still I get forward time as
contracting block time: 2.4187822341918945
bottleneck block time: 0.31655025482177734
expanding block time: 2.5195484161376953
fwd 5.256795644760132s
bwd 10.607162237167358s
Below is the code I have written for your reference
Unet model:
class Unet(nn.Module):
def contracting_block(self, in_channels, out_channels, kernel_size=3):
block = torch.nn.Sequential(
torch.nn.Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=out_channels),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(out_channels),
torch.nn.Conv2d(kernel_size=kernel_size, in_channels=out_channels, out_channels=out_channels),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(out_channels),
)
return block
def expansive_block(self, in_channels, mid_channels, out_channels, kernel_size=3):
block = torch.nn.Sequential(
torch.nn.Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=mid_channels),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(mid_channels),
torch.nn.Conv2d(kernel_size=kernel_size, in_channels=mid_channels, out_channels=mid_channels),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(mid_channels),
torch.nn.ConvTranspose2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=0, output_padding=0,dilation=1)
)
return block
def final_block(self, in_channels, mid_channels, out_channels, kernel_size=3):
block = torch.nn.Sequential(
torch.nn.Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=mid_channels ),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(mid_channels),
torch.nn.Conv2d(kernel_size=kernel_size, in_channels=mid_channels, out_channels=mid_channels),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(mid_channels),
torch.nn.Conv2d(kernel_size=kernel_size, in_channels=mid_channels, out_channels=out_channels,padding=(86,86)),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(out_channels)
)
return block
def __init__(self, in_channels, out_channels):
super(Unet, self).__init__()
self.layer1 = nn.Conv2d(1,1 , kernel_size=1, stride=(1,1))
#Encode
self.conv_encode1 = self.contracting_block(in_channels =1 , out_channels =64)
self.conv_maxpool1 = torch.nn.MaxPool2d(kernel_size=2)
self.conv_encode2 = self.contracting_block(in_channels =64, out_channels =128)
self.conv_maxpool2 = torch.nn.MaxPool2d(kernel_size=2)
self.conv_encode3 = self.contracting_block(in_channels =128, out_channels =256)
self.conv_maxpool3 = torch.nn.MaxPool2d(kernel_size=2)
self.conv_encode4 = self.contracting_block(in_channels =256, out_channels =512)
self.conv_maxpool4 = torch.nn.MaxPool2d(kernel_size=2)
# Bottleneck
self.bottleneck = torch.nn.Sequential(
torch.nn.Conv2d(kernel_size=3, in_channels=512, out_channels=1024),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(1024),
torch.nn.Conv2d(kernel_size=3, in_channels=1024, out_channels=1024),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(1024),
torch.nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=2, stride=2, padding=0, output_padding=0)
)
# Decode
self.conv_decode4 = self.expansive_block(1024, 512, 256)
self.conv_decode3 = self.expansive_block(512, 256, 128)
self.conv_decode2 = self.expansive_block(256, 128, 64)
self.final_layer = self.final_block(128, 64, 1)
self.pad1 = nn.ConstantPad2d(padding =(1,0,1,0),value=0)
def crop_and_concat(self, upsampled, bypass, crop=False):
if crop:
c = (bypass.size()[2] - upsampled.size()[2]) // 2
bypass = F.pad(bypass, (-c, -c, -c, -c))
return torch.cat((upsampled, bypass),1)
def forward(self, x):
t1 = time.time()
# Encode
pad_x = self.layer1(x)
encode_block1 = self.conv_encode1(pad_x)
encode_pool1 = self.conv_maxpool1(encode_block1)
encode_block2 = self.conv_encode2(encode_pool1)
encode_pool2 = self.conv_maxpool2(encode_block2)
encode_block3 = self.conv_encode3(encode_pool2)
encode_pool3 = self.conv_maxpool3(encode_block3)
encode_block4 = self.conv_encode4(encode_pool3)
encode_pool4 = self.conv_maxpool4(encode_block4)
t2 = time.time()
print("contracting block time: ", t2-t1)
t3 = time.time()
# Bottleneck
bottleneck1 = self.bottleneck(encode_pool4)
t4 = time.time()
print("bottleneck block time: ", t4-t3)
t5 = time.time()
# Decode
decode_block4 = self.crop_and_concat(bottleneck1, encode_block4, crop=True)
cat_layer3 = self.conv_decode4(decode_block4)
cat_layer3 = self.pad1(cat_layer3)
decode_block3 = self.crop_and_concat(cat_layer3, encode_block3, crop=True)
cat_layer2 = self.conv_decode3(decode_block3)
cat_layer2 = self.pad1(cat_layer2)
decode_block2 = self.crop_and_concat(cat_layer2, encode_block2, crop=True)
cat_layer1 = self.conv_decode2(decode_block2)
pad1_cat_layer1 = self.pad1(cat_layer1)
decode_block1 = self.crop_and_concat(pad1_cat_layer1, encode_block1, crop=True)
final_layer = self.final_layer(decode_block1)
t5 = time.time()
print("expanding block time: ", t5-t4)
return final_layer
batch_size_train = 1
batch_sizetest = 1
num_epochs = 50
learning_rate = 0.1
img_width = 540
img_height = 540
Training code: Same as yours
x = torch.randn(1, 1, 540, 540)
# torch.cuda.synchronize()
t0 = time.time()
output = model(x)
# torch.cuda.synchronize()
t1 = time.time()
print('fwd {}s'.format(t1 - t0))
loss = output.mean()
# torch.cuda.synchronize()
t0 = time.time()
loss.backward()
# torch.cuda.synchronize()
t1 = time.time()
print('bwd {}s'.format(t1 - t0))
Please help me I just cant figure out why, or give me any idea on how to approach to solve this problem