This is going to be a long question. This below is my neural net.
class CarBrand(nn.Module):
def __init__(self, num_classes):
super(CarBrand, self).__init__()
# 'in_channels': color channels in images, since images in dataset are greyscaled, in_channels start from 1
self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
self.relu = nn.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
# 128: comes from "conv3", 124: image size is 124x124, 1: only greyscaled, not RGB
self.fc1 = nn.Linear(128 * (image_height // 8) * (image_width // 8), 256) # Adjust the input size
#self.fc2_class = nn.Linear(512, num_classes)
self.fc2_class = nn.Linear(256, num_classes)
def forward(self, x):
x = self.relu(self.conv1(x))
x = self.maxpool(x)
x = self.relu(self.conv2(x))
x = self.maxpool(x)
x = self.relu(self.conv3(x))
x = self.maxpool(x)
x = x.view(x.size(0), -1)
x = self.relu(self.fc1(x))
brand_pred = self.fc2_class(x)
return brand_pred
And that is my Dataset class
class CarBrandDataset(Dataset):
def __init__(self, data_dir, transform=None):
self.data_dir = data_dir
self.transform = transform
self.image_paths = [filename for filename in os.listdir(data_dir) if filename.endswith('.jpg')]
def __len__(self):
return len(self.image_paths)
def __getitem__(self, index):
image_path = os.path.join(self.data_dir, self.image_paths[index])
xml_path = os.path.splitext(image_path)[0] + '.xml'
image = Image.open(image_path).convert('L') # convert to greyscale
if self.transform:
data = (2, 0.5, -10, -0.5, 1.5, 20)
image = image.transform(image.size, Image.AFFINE, data)
# parse XML
tree = ET.parse(xml_path)
root = tree.getroot()
# extract label information from XML
brand_label_mapping = {
'bmw': 0,
'citroen': 1,
'mercedes': 2,
'maserati': 3,
'audi': 4,
'honda': 5
}
brand_label_str = root.text
brand_label = brand_label_mapping.get(brand_label_str, -1)
image_np = np.asarray(image, dtype=np.float32)
return image_np, brand_label
transform = transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.ToTensor()
])
data_dir = "splitted_data/train"
dataset = CarBrandDataset(data_dir, transform=transform)
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
car_brands: int = 6
num_epochs: int = 100
and finally this is my training loop:
model = CarBrand(num_classes=car_brands)
criterion_class = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
for epoch in range(num_epochs):
running_loss = 0.0
for images, brand_labels in dataloader:
optimizer.zero_grad()
brand_pred = model(images)
loss_brand = criterion_class(brand_pred, brand_labels)
loss_brand.backward()
optimizer.step()
running_loss += loss_brand.item()
print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {running_loss / len(dataloader)}")
But I get errors,
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[38], line 12
9 for images, brand_labels in dataloader:
10 optimizer.zero_grad()
---> 12 brand_pred = model(images)
14 loss_brand = criterion_class(brand_pred, brand_labels)
16 loss_brand.backward()
File ~\OneDrive\Masaüstü\plate\pytorchNN\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
Cell In[33], line 20, in CarBrand.forward(self, x)
18 def forward(self, x):
---> 20 x = self.relu(self.conv1(x))
21 x = self.maxpool(x)
23 x = self.relu(self.conv2(x))
File ~\OneDrive\Masaüstü\plate\pytorchNN\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File ~\OneDrive\Masaüstü\plate\pytorchNN\Lib\site-packages\torch\nn\modules\conv.py:463, in Conv2d.forward(self, input)
462 def forward(self, input: Tensor) -> Tensor:
--> 463 return self._conv_forward(input, self.weight, self.bias)
File ~\OneDrive\Masaüstü\plate\pytorchNN\Lib\site-packages\torch\nn\modules\conv.py:459, in Conv2d._conv_forward(self, input, weight, bias)
455 if self.padding_mode != 'zeros':
456 return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
457 weight, bias, self.stride,
458 _pair(0), self.dilation, self.groups)
--> 459 return F.conv2d(input, weight, bias, self.stride,
460 self.padding, self.dilation, self.groups)
RuntimeError: Given groups=1, weight of size [32, 1, 3, 3], expected input[1, 32, 300, 300] to have 1 channels, but got 32 channels instead```
I searched Stackoverflow, I asked ChatGPT,Bard,BingAI.
I am really about to lose my mind, I just want to finish it and get some peaceful sleep, Please solve this annoying problem.Sorry it's too long, it's my whole code.