I’m still a beginner in terms of AI and Neuronal Networks and during the time this time of learning I’m trying to do some examples but I have a problem and no idea how can I fix it. If any of you would be able to help me, I would be really grateful.
What I’m trying to do?
A simple image classification with 10 types of animals using PyTorch with some custom Dataset.
My images
Each image is going to be with a shape as (3, 200, 200)
Also I have something like 40 images on each folder (train and test)
How dose it look my data folders?
-
train
-
cat
-
dog
-
…
-
rat
-
test
-
cat
-
dog
-
…
-
rat
My model
class NetModel(nn.Module):
def __init__(self):
super(NetModel, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def train_model(model, train_loader, optimizer, criterion, epochs, save_path):
for epoch in range(epochs): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
# PATH = './cifar_net.pth'
torch.save(model.state_dict(), save_path)
print('Saved in: {}'.format(save_path))
def test(model, test_loader, classes, saved_path):
model.load_state_dict(torch.load(saved_path))
dataiter = iter(test_loader)
images, labels = dataiter.next()
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
outputs = model(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
charset = string.ascii_letters + "-' "
trainset = TESNamesDataset('../data/train', charset, 10)
testset = TESNamesDataset('../data/test', charset, 10)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=2)
model = NetModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
path = "../check_point/saved_model.pth"
train_model(model, trainloader, optimizer, criterion, 100, path)
test(model, testloader, animals, path)
My custom dataset
class TESNamesDataset(Dataset):
def __init__(self, data_root, charset, length):
self.data_root = data_root
self.charset = charset + '\0'
self.length = length
self.samples = []
self.char_codec = LabelEncoder()
self._init_dataset()
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
name, pixels = self.samples[idx]
return self.one_hot_sample(name), pixels
def _init_dataset(self):
names = set()
# self.samples = []
for animal in os.listdir(self.data_root):
animal_filepath = os.path.join(self.data_root, animal)
names.add(animal)
for img_name in os.listdir(animal_filepath):
img_path = os.path.join(animal_filepath, img_name)
im = cv2.imread(img_path)
if len(animal) < self.length:
animal += '\0' * (self.length - len(animal))
else:
animal = animal[:self.length - 1] + '\0'
self.samples.append((animal, im))
self.char_codec.fit(list(self.charset))
def to_one_hot(self, codec, values):
value_idxs = codec.transform(values)
return torch.eye(len(codec.classes_))[value_idxs]
def one_hot_sample(self, name):
t_name = self.to_one_hot(self.char_codec, list(name))
return t_name
The ERROR message
RuntimeError: Expected 4-dimensional input for 4-dimensional weight 6
3 5 5, but got 3-dimensional input of size [4, 10, 56] instead
Can any of you tell me what I’m doing wrong? In case that this question is off-topic just let me know where I can ask it.
Thanks