Hello, there. I am trying to use this CNN code for img classifications. I am using 480x640 (height x length) images as input, also with its respectives 3 color channels. I adapted this code from a really famous online tutorial (shoutout to sentdex!) that was meant to process grayscale images, and it worked for me, although the accuracy was really bad, but i expected that since my classifications rely HARDLY on colors.
The thing is, i got stuck on this this error, and i can’t seem to overcome this, since i am a COMPLETE newbie into this programming world, self taught and blablabla. So, take easy on me
So, there is the code:
import os
import cv2
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
REBUILD_DATA = False # set to true to one once, then back to false unless you want to change something in your training data.
if torch.cuda.is_available():
device = torch.device("cuda:0") # you can continue going on here, like cuda:1 cuda:2....etc.
print("Running on the GPU")
else:
device = torch.device("cpu")
print("Running on the CPU")
class CornSeeds():
COB = "C:/Users/leo_f/OneDrive/Imagens/MV/Cob"
BROKEN = "C:/Users/leo_f/OneDrive/Imagens/MV/Broken"
ROTTEN = "C:/Users/leo_f/OneDrive/Imagens/MV/Rotten"
GOOD = "C:/Users/leo_f/OneDrive/Imagens/MV/Good"
TESTING = "C:/Users/leo_f/OneDrive/Imagens/MV/Testing"
LABELS = {COB: 0, BROKEN: 1, ROTTEN: 2, GOOD: 3}
training_data = []
cobcount = 0
brokencount = 0
rottencount = 0
goodcount = 0
def make_training_data(self):
for label in self.LABELS:
print(label)
for f in tqdm(os.listdir(label)):
if "bmp" in f:
try:
path = os.path.join(label, f)
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
self.training_data.append([np.array(img), np.eye(4)[self.LABELS[label]]]) # do something like print(np.eye(2)[1]), just makes one_hot
#print(np.eye(2)[self.LABELS[label]])
if label == self.COB:
self.cobcount += 1
elif label == self.BROKEN:
self.brokencount += 1
elif label == self.ROTTEN:
self.rottencount += 1
elif label == self.GOOD:
self.goodcount += 1
except Exception as e:
pass
#print(label, f, str(e))
np.random.shuffle(self.training_data)
np.save("training_data.npy", self.training_data)
print('Cob:',cornseeds.cobcount)
print('Broken:',cornseeds.brokencount)
print('Rotten:',cornseeds.rottencount)
print('Good:',cornseeds.goodcount)
if REBUILD_DATA:
cornseeds = CornSeeds()
cornseeds.make_training_data()
training_data = np.load("training_data.npy", allow_pickle=True)
print(len(training_data))
class Net(nn.Module):
def __init__(self):
super().__init__() # just run the init of parent class (nn.Module)
self.conv1 = nn.Conv2d(3, 128, 5) # input is 3 channels of an image, 128 output channels, 5x5 kernel / window
self.conv2 = nn.Conv2d(128, 256, 5) # input is 128, bc the first layer output 128. Then we say the output will be 256 channels, 5x5 kernel / window
self.conv3 = nn.Conv2d(256, 512, 5)
x = torch.randn(480, 640).view(-1, 3, 480, 640) # batch, channel, height, width
self._to_linear = None
self.convs(x)
self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
self.fc2 = nn.Linear(512, 4) # 512 in, 4 out bc we're doing 4 classes.
def convs(self, x):
# max pooling over 2x2
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
if self._to_linear is None:
self._to_linear = np.prod(x[0].shape)
return x
def forward(self, x):
x = self.convs(x)
x = x.view(-1, self._to_linear) # .view is reshape ... this flattens X before
x = F.relu(self.fc1(x))
x = self.fc2(x) # bc this is our output layer. No activation here.
return F.softmax(x, dim=1)
net = Net().to(device)
print(net)
if REBUILD_DATA:
cornseeds = CornSeeds()
cornseeds.make_training_data()
training_data = np.load("training_data.npy", allow_pickle=True)
print(len(training_data))
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.CrossEntropyLoss()
X = torch.Tensor([i[0] for i in training_data]).view(-1, 3, 480, 640)
X = X/255.0
y = torch.Tensor([i[1] for i in training_data])
VAL_PCT = 0.1 # lets reserve 10% of our data for validation
val_size = int(len(X)*VAL_PCT)
train_X = X[:-val_size]
train_X.unsqueeze_(0)
train_y = y[:-val_size]
test_X = X[-val_size:]
test_X.unsqueeze_(0)
test_y = y[-val_size:]
def train(net):
optimizer = optim.Adam(net.parameters(), lr=0.001)
BATCH_SIZE = 4
EPOCHS = 1
for epoch in range(EPOCHS):
for i in range(0, len(train_X), BATCH_SIZE): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
#print(f"{i}:{i+BATCH_SIZE}")
batch_X = train_X[i:i+BATCH_SIZE].view(-1, 3, 480, 640)
batch_y = train_y[i:i+BATCH_SIZE]
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
net.zero_grad()
optimizer.zero_grad() # zero the gradient buffers
outputs = net(batch_X)
loss = loss_function(outputs, batch_y)
loss.backward()
optimizer.step() # Does the update
print(f"Epoch: {epoch}. Loss: {loss}")
def test(net):
correct = 0
total = 0
with torch.no_grad():
for i in tqdm(range(len(test_X))):
real_class = torch.argmax(test_y[i]).to(device)
net_out = net(test_X[i].view(-1, 1, 480, 640).to(device))[0] # returns a list,
predicted_class = torch.argmax(net_out)
if predicted_class == real_class:
correct += 1
total += 1
print("Accuracy: ", round(correct/total, 3))
train(net)
test(net)
And this is the error i am getting:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-46-1894121d00fb> in <module>
103
104
--> 105 net = Net().to(device)
106 print(net)
107
<ipython-input-46-1894121d00fb> in __init__(self)
78 self.conv3 = nn.Conv2d(256, 512, 5)
79
---> 80 x = torch.randn(480, 640).view(-1, 3, 480, 640) # batch, channel, height, width
81 self._to_linear = None
82 self.convs(x)
RuntimeError: shape '[-1, 3, 480, 640]' is invalid for input of size 307200
Help me