I’m doing hand gesture recognition, this network in keras behaves pretty good, 99% accuracy.
model = Sequential()
model.add(Conv2D(32, (5, 5), activation='relu', input_shape=(HEIGHT, WIDTH, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='SGD', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, batch_size=64, verbose=1, validation_data=(x_test, y_test))
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test accuracy: %.2f" % (test_acc * 100))
Here’s what I’m doing with pytorch
class Network(nn.Module):
def __init__(self, ):
super(Network, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.conv2_drop = nn.Dropout2d(p=.5)
self.conv3 = nn.Conv2d(64, 64, kernel_size=3)
self.fc1 = nn.Linear(3584, 128)
self.fc2 = nn.Linear(128, 10)
# computes a forward pass for the network
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2)
x = F.relu(self.conv3(x))
x = F.max_pool2d(x, 2)
x = x.view(-1, 3584)
x = F.relu(self.fc1(x))
return self.fc2(x)
def train_network(network, train_loder, optimizer, epoch):
network.train()
for batch_idx, (data, target) in enumerate(train_loder):
optimizer.zero_grad()
# forward
output = network(data)
# loss function negative likelihood loss
# loss = F.nll_loss(output, target)
loss = nn.CrossEntropyLoss()(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
info = 'Train epoch: {} [{}/{} ({:.0f})]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loder.dataset),
100.0 * batch_idx / len(train_loder), loss.item()
)
print(info)
train_loss.append(loss.item())
train_counter.append(
batch_idx * batch_size_train + (epoch - 1) * len(train_loder.dataset)
)
def test_model(network, test_loader):
network.eval()
t_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
output = network(data)
# t_loss += F.nll_loss(output, target, reduction='sum').item()
t_loss += nn.CrossEntropyLoss()(output, target).item()
prediction = output.data.max(1, keepdim=True)[1]
correct += prediction.eq(target.data.view_as(prediction)).sum()
# average loss
t_loss /= len(test_loader.dataset)
test_loss.append(t_loss)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
t_loss, correct, len(test_loader.dataset),
100.0 * correct / len(test_loader.dataset)
))
def main():
# initialize network
network = Network()
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
# optimizer = optim.Adam(network.parameters(), lr=learning_rate) # very bad performance
root = './leapGestRecog'
training_ds = HandGestureDataset(x_train, y_train, training=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
]))
test_ds = HandGestureDataset(x_test, y_test, training=False,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
]))
training_loader = torch.utils.data.DataLoader(training_ds, batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size_test, shuffle=True)
test_model(network, test_loader)
for i in range(1, epochs + 1):
train_network(network, training_loader, optimizer, i)
test_model(network, test_loader)
The pytorch version isn’t learning at all, the loss isn’t going down. The learning rate is 0.01 and momentum is 0.0 according to the keras documentation, these are the default values.