Hello,

I have the following error, that seems related to my model initialization.

```
RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)
```

```
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 1, kernel_size=7, padding=3)
self.conv2 = nn.Conv2d(1, 8, kernel_size=7, padding=3)
self.conv3 = nn.Conv2d(8, 8, kernel_size=7, padding=3, stride=2)
self.conv4 = nn.Conv2d(8, 8, kernel_size=7, padding=3, stride=2)
self.conv5 = nn.Conv2d(8, 16, kernel_size=7, padding=3, stride=2)
self.conv6 = nn.Conv2d(16, 32, kernel_size=7, padding=3, stride=2)
self.conv7 = nn.Conv2d(32, 32, kernel_size=7, padding=3, stride=2)
self.fc1 = nn.Linear(32*16*16,256) #Error : size ?
self.fc2 = nn.Linear(256,48)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.conv6(x)
x = self.conv7(x)
print('size',x.size())
x = self.fc1(x)
x = self.fc2(x)
return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0
model = Net().to(device)
summary(model, (1, 512, 512))
```

But it seems that my fully connected 1 is incorrect.

Anyone knows why?