Thank you, but it still doesn’t work. I still think that are is some problem with training process. Maybe you have some other ideas?
- You can find my full code below. I use cifar10 data-set to test. Result is:
Files already downloaded and verified
Files already downloaded and verified
-------Your example:
tensor([[[-0.0345, -0.0376, -0.0394, -0.0299, -0.0289, -0.0306, -0.0450,
-0.0433, -0.0467],
[-0.0135, -0.0142, -0.0209, -0.0043, -0.0014, -0.0082, -0.0083,
-0.0053, -0.0138],
[ 0.0018, -0.0113, -0.0043, -0.0084, -0.0320, -0.0282, -0.0141,
-0.0362, -0.0300]]], device='cuda:0')
-------Compare my Conv with nn.Conv2d
tensor(-1.2677e+31, device='cuda:0', grad_fn=<SumBackward0>)
------- Training:
~10% accuracy , example:
22/ 1563 --- Loss: 0000nan | Acc: 09.5109 (00070/00736)
I see that loss is 0. Of course I run this code longer then 22 batches.
- If I change my layer MyConv2d to nn.Conv2d then accuracy increase to normal value ~60%. With my layer accuracy is about ~10%.
#self.conv1 = nn.Conv2d(3, 64, 3, bias=False)
#self.conv2 = nn.Conv2d(64, 32, 3, bias=False)
self.conv1 = MyConv2d(3, 64, 3)
self.conv2 = MyConv2d(64, 32, 3)
- If I add that
print
:
def forward(self, x):
print(self.weights[0][0][0:5].cpu().detach())
then I can see that weights doesn’t change(I tested others weights too) - two tensor because two convolutional layer.
tensor([-4.9320e-21, 4.5769e-41, -4.9320e-21, 4.5769e-41, -3.8139e-18])
tensor([-4.9321e-21, 4.5769e-41, 3.8115e-35, 0.0000e+00, -3.7481e-18])
...after 1 epoch weights are the same...
tensor([-4.9320e-21, 4.5769e-41, -4.9320e-21, 4.5769e-41, -3.8139e-18])
tensor([-4.9321e-21, 4.5769e-41, 3.8115e-35, 0.0000e+00, -3.7481e-18])
from sys import stdout
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import math
##############
device = torch.device("cuda:0")
epochs = 10
batch_size = 32
##############
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)
##############
class MyConv2d(nn.Module):
def __init__(self, n_channels, out_channels, kernel_size, dilation=1, padding=0, stride=1):
super(MyConv2d, self).__init__()
self.kernel_size = (kernel_size, kernel_size)
self.kernal_size_number = kernel_size * kernel_size
self.out_channels = out_channels
self.dilation = (dilation, dilation)
self.padding = (padding, padding)
self.stride = (stride, stride)
self.n_channels = n_channels
self.weights = nn.Parameter(torch.Tensor(self.out_channels, self.n_channels, self.kernal_size_number))
def forward(self, x):
width = self.calculateNewWidth(x)
height = self.calculateNewHeight(x)
windows = self.calculateWindows(x)
result = torch.zeros(
[x.shape[0] * self.out_channels, width, height], dtype=torch.float32, device=device
)
for channel in range(x.shape[1]):
for i_convNumber in range(self.out_channels):
xx = torch.matmul(windows[channel], self.weights[i_convNumber][channel])
xx = xx.view(-1, width, height)
result[i_convNumber * xx.shape[0] : (i_convNumber + 1) * xx.shape[0]] += xx
result = result.view(x.shape[0], self.out_channels, width, height)
return result
def calculateWindows(self, x):
windows = F.unfold(
x, kernel_size=self.kernel_size, padding=self.padding, dilation=self.dilation, stride=self.stride
)
windows = windows.transpose(1, 2).contiguous().view(-1, x.shape[1], self.kernal_size_number)
windows = windows.transpose(0, 1)
return windows
def calculateNewWidth(self, x):
return (
(x.shape[2] + 2 * self.padding[0] - self.dilation[0] * (self.kernel_size[0] - 1) - 1)
// self.stride[0]
) + 1
def calculateNewHeight(self, x):
return (
(x.shape[3] + 2 * self.padding[1] - self.dilation[1] * (self.kernel_size[1] - 1) - 1)
// self.stride[1]
) + 1
def get_weights(self):
kernal_size = int(math.sqrt(self.kernal_size_number))
return nn.Parameter(self.weights.view(self.out_channels, self.n_channels, kernal_size, kernal_size))
##############
print("-------")
conv = MyConv2d(3, 1, 3).cuda()
x = torch.randn(1, 3, 24, 24).cuda()
out = conv(x)
out.mean().backward()
print(conv.weights.grad)
print("-------")
##############
class TestModel(nn.Module):
def __init__(self):
super(TestModel, self).__init__()
self.conv1 = MyConv2d(3, 64, 3)
self.conv2 = torch.nn.Conv2d(3, 64, 3, bias=False)
self.conv2.weight = self.conv1.get_weights()
def forward(self, x):
y1 = self.conv1(x)
y2 = self.conv2(x)
return [y1,y2]
model = TestModel().to(device)
x, _ = [ x[0:32] for x in iter(trainloader).next() ]
x = x.to(device)
result = model(x)
print(torch.sum(result[1]-result[0]))
print("-------")
##############
class CnnModel(nn.Module):
def __init__(self):
super(CnnModel, self).__init__()
#self.conv1 = nn.Conv2d(3, 64, 3, bias=False)
#self.conv2 = nn.Conv2d(64, 32, 3, bias=False)
self.conv1 = MyConv2d(3, 64, 3)
self.conv2 = MyConv2d(64, 32, 3)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(32 * 6 * 6, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.pool(x)
x = self.conv2(x)
x = F.relu(x)
x = self.pool(x)
x = x.view(-1, 32 * 6 * 6)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
##############
model = CnnModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
model.train()
for epoch in range(epochs):
train_loss = 0.0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(trainloader, 0):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
train_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
stdout.write("\r{:5d}/{:5d} --- Loss: {:07.5f} | Acc: {:07.4f} \t\t\t({:05d}/{:05d})".format(
batch_idx,
len(trainloader),
train_loss / (batch_idx + 1),
100.0 * correct / total,
correct,
total,
)
)
stdout.flush()
stdout.write("\n")```