After a daylong struggle,I finally made somewhat equivalent code in pytorch:
Now code runs fine however result predicted result (array of 10 in this case) is compared to target (label), it is a horribly mismatch. The tensorflow code predicted perfectly the classes. Obviously I am doing something wrong. Here is my much improved torch equivalent of fashion mnist code along its output:
import torch
import torch.nn as nn
import helper
import sys
import time
import re
import numpy as np
import matplotlib as plt
DEBUG=0
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
CONFIG_EPOCHS=2
CONFIG_BATCH_SIZE=64
for i in sys.argv:
print("Processing ", i)
try:
if re.search("epochs", i):
CONFIG_EPOCHS=int(i.split('=')[1])
if re.search("batch_size", i):
CONFIG_BATCH_SIZE=int(i.split('=')[1])
except Exception as msg:
print(msg)
print("No argument provided, default values will be used.")
print("epochs: ", CONFIG_EPOCHS)
print("batch_size: ", CONFIG_BATCH_SIZE)
labels_map = {0 : 'T-Shirt', 1 : 'Trouser', 2 : 'Pullover', 3 : 'Dress', 4 : 'Coat', 5 : 'Sandal', 6 : 'Shirt',
7 : 'Sneaker', 8 : 'Bag', 9 : 'Ankle Boot'};
trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=ToTensor())
training_data = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor()
)
trainloader = torch.utils.data.DataLoader(training_data, batch_size=CONFIG_BATCH_SIZE, shuffle = True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle = True)
print("training_data/test_data: ", type(training_data), len(training_data), type(test_data), len(test_data))
print("type: ", type(training_data[0]))
print("trainloader: ", type(trainloader))
print("testloader: ", type(testloader))
f1=nn.Flatten()
l1=nn.Linear(28*28, 300)
r1=nn.ReLU()
l2=nn.Linear(300, 100)
r2=nn.ReLU()
l3=nn.Linear(100, 30)
s3=nn.Softmax()
model = nn.Sequential(\
f1,
l1,
r1,
l2,
r2,
l3,
s3, \
)
print("Model: ", model)
#print("model: layer0: ", model[0], model[0].weight)
print("l1 info: ", l1, l1.weight.shape)
print("l2 info: ", l2, l2.weight.shape)
print("l3 info: ", l3, l3.weight.shape)
#criterion = torch.nn.MSELoss()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
i=0
for epoch in range(CONFIG_EPOCHS):
print("")
print('epoch/i: ', epoch, i, end=' ', flush=True)
j=0
for batch in trainloader:
imgs, lbls = batch
if j == 0:
bypass_dots=int(len(training_data)/len(lbls)/80)
print("batch: ", type(batch), ", ", len(batch))
print("imgs: ", type(imgs), ", ", len(imgs), imgs.shape)
print("lbls: ", type(lbls), ", ", len(lbls), lbls.shape)
if DEBUG:
print("bypass_dots quantity: ", bypass_dots)
if DEBUG:
print("batch: ", type(batch), ", ", len(batch))
print("imgs: ", type(imgs), ", ", len(imgs), imgs.shape)
print("lbls: ", type(lbls), ", ", len(lbls), lbls.shape)
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(imgs)
if DEBUG:
print("y_pred: ", type(y_pred), y_pred.shape)
print("lbls: ", type(lbls), lbls.shape)
# Compute and print loss
loss = criterion(y_pred, lbls)
if DEBUG:
print('epoch/batch: ', epoch, i,' loss: ', loss.item())
if j%bypass_dots == 0:
print(".", end='', flush=True)
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
# perform a backward pass (backpropagation)
loss.backward()
# Update the parameters
optimizer.step()
j+=1
i+=1
print("Testing...")
i=0
print(len(testloader), type(testloader))
i=0
#for batch in testloader:
# imgs, lbls = batch
SLICE=10
for imgs, lbls in testloader:
imgs1=imgs[:SLICE]
lbls1=lbls[:SLICE]
print("---", i, "---")
print("imgs: ", imgs.shape)
print("lbls: ", lbls.shape)
print("imgs1: ", imgs1.shape)
if i >= 0:
break
i+=1
y_pred=model(imgs1)
print("y_pred: ", y_pred.shape, type(y_pred))
print(y_pred)
_, pred_class = torch.max(y_pred, 1)
print("pred_class: ", pred_class)
print("lbls1: ", lbls1)
output:
root@nonroot-Standard-PC-i440FX-PIIX-1996:~/dev-learn/gpu/pytorch/port-effort-from-tflow-2nd# python3 p297.py epochs=5
Processing p297.py
Processing epochs=5
epochs: 5
batch_size: 64
/usr/local/lib/python3.6/dist-packages/torchvision/datasets/mnist.py:498: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:180.)
return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
training_data/test_data: torchvision.datasets.mnist.FashionMNIST 60000 torchvision.datasets.mnist.FashionMNIST 10000
type: <class 'tuple'>
trainloader: torch.utils.data.dataloader.DataLoader
testloader: torch.utils.data.dataloader.DataLoader
Model: Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=300, bias=True)
(2): ReLU()
(3): Linear(in_features=300, out_features=100, bias=True)
(4): ReLU()
(5): Linear(in_features=100, out_features=30, bias=True)
(6): Softmax(dim=None)
)
l1 info: Linear(in_features=784, out_features=300, bias=True) torch.Size([300, 784])
l2 info: Linear(in_features=300, out_features=100, bias=True) torch.Size([100, 300])
l3 info: Linear(in_features=100, out_features=30, bias=True) torch.Size([30, 100])
epoch/i: 0 0 batch: <class 'list'> , 2
imgs: <class 'torch.Tensor'> , 64 torch.Size([64, 1, 28, 28])
lbls: <class 'torch.Tensor'> , 64 torch.Size([64])
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/container.py:139: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
input = module(input)
......................................................................................
epoch/i: 1 1 batch: <class 'list'> , 2
imgs: <class 'torch.Tensor'> , 64 torch.Size([64, 1, 28, 28])
lbls: <class 'torch.Tensor'> , 64 torch.Size([64])
......................................................................................
epoch/i: 2 2 batch: <class 'list'> , 2
imgs: <class 'torch.Tensor'> , 64 torch.Size([64, 1, 28, 28])
lbls: <class 'torch.Tensor'> , 64 torch.Size([64])
......................................................................................
epoch/i: 3 3 batch: <class 'list'> , 2
imgs: <class 'torch.Tensor'> , 64 torch.Size([64, 1, 28, 28])
lbls: <class 'torch.Tensor'> , 64 torch.Size([64])
......................................................................................
epoch/i: 4 4 batch: <class 'list'> , 2
imgs: <class 'torch.Tensor'> , 64 torch.Size([64, 1, 28, 28])
lbls: <class 'torch.Tensor'> , 64 torch.Size([64])
......................................................................................Testing...
313 torch.utils.data.dataloader.DataLoader
--- 0 ---
imgs: torch.Size([32, 1, 28, 28])
lbls: torch.Size([32])
imgs1: torch.Size([10, 1, 28, 28])
y_pred: torch.Size([10, 30]) <class 'torch.Tensor'>
tensor([[2.4371e-03, 4.3862e-05, 2.1819e-03, 3.1150e-04, 3.9027e-03, 2.6907e-04,
...
2.6642e-03, 2.1615e-02, 1.3037e-02, 5.6110e-02, 6.3985e-04, 3.8399e-04,
1.1296e-03, 5.2669e-04, 1.1069e-03, 4.4081e-04, 3.0513e-04, 6.3576e-04,
7.3444e-04, 5.2634e-04, 9.4558e-04, 7.3277e-04, 7.7510e-04, 5.5766e-04,
5.8993e-04, 4.8865e-04, 5.8177e-04, 5.2099e-04, 8.9330e-04, 9.2164e-04]],
grad_fn=<SoftmaxBackward>)
pred_class: tensor([9, 9, 0, 9, 9, 4, 9, 1, 1, 4])
lbls1: tensor([9, 5, 3, 5, 7, 4, 5, 1, 1, 6])