I wrote a script to train conv net on mnist, but there are some problems.
The mnist dataset is from Kaggle (train.csv
). The reference codes are
listed in the head part of the code.
Problems:
(1) line 46
: Without this line, it will raise RuntimeError: expected Double tensor (got Float tensor)
.
The tutorial and example code don’t do like this…
46 convnet = convnet.double() #RuntimeError: expected Double tensor (got Float tensor)
(2) line 66
: will crash if I don’t explicitly assign True
to model.trainning
. The totorial and example code don’t do like this…
66 model.trainning = True #AttributeError: 'MnistConvNet' object has no attribute 'trainning'
(3) The model seems not learning (test accuracy still 0.11 after 100 iterations), but I didn’t find out why. The test accuracy should arise after merely several iterations (The TF example codes in Kaggle do so). Is the code wrong?
Thanks in advance!
Complete script:
1 # http://pytorch.org/tutorials/
2 # http://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html
3 # https://github.com/pytorch/examples/blob/master/mnist/main.py
4
5 import sys
6 import os
7
8 os.putenv('OPENBLAS_NUM_THREADS', '4')
9
10 import torch as th
11 import torch.nn.functional as thnf
12 import numpy as np
13 import pandas as pd
14 from sklearn.model_selection import train_test_split
15 print('-> Using TH', th.__version__)
16
17 ### Read Train-Val data and split ###
18 trainval = pd.read_csv("train.csv")
19 trainval_images = trainval.iloc[:, 1:].div(255)
20 trainval_labels = trainval.iloc[:, :1]
21 train_images, val_images, train_labels, val_labels = train_test_split(
22 trainval_images, trainval_labels, train_size=0.8, random_state=0)
23 print('-> train set shape', train_images.shape)
24 print('-> val set shape', val_images.shape)
25
26 ### Model ###
27 class MnistConvNet(th.nn.Module):
28 def __init__(self):
29 super(MnistConvNet, self).__init__()
30 self.conv1 = th.nn.Conv2d(1, 10, kernel_size=5)
31 self.conv2 = th.nn.Conv2d(10, 20, kernel_size=5)
32 self.conv2_drop = th.nn.Dropout2d()
33 self.fc1 = th.nn.Linear(320, 50)
34 self.fc2 = th.nn.Linear(50, 10)
35 def forward(self, x):
36 x = thnf.relu(thnf.max_pool2d(self.conv1(x), 2))
37 x = thnf.relu(thnf.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
38 x = x.view(-1, 320)
39 x = thnf.relu(self.fc1(x))
40 x = thnf.dropout(x, training=self.trainning)
41 x = self.fc2(x)
42 return x
43 #return thnf.log_softmax(x)
44
45 convnet = MnistConvNet()
46 convnet = convnet.double() #RuntimeError: expected Double tensor (got Float tensor)
47 crit = th.nn.CrossEntropyLoss()
48 optimizer = th.optim.Adam(convnet.parameters(), lr=1e-2)
49
50 ### Train and Val ###
51
52 def step_train(model, iteration):
53 i = iteration
54 batch_images = train_images.iloc[
55 (i*50)%33600:
56 (i+1)%672==0 and 33600 or ((i+1)*50)%33600].values
57 batch_labels = train_labels.iloc[
58 (i*50)%33600:
59 (i+1)%672==0 and 33600 or ((i+1)*50)%33600].values
60 batch_images = th.autograd.Variable(th.from_numpy(batch_images))
61 batch_labels = th.autograd.Variable(th.from_numpy(batch_labels))
62 batch_images = batch_images.resize(50, 1, 28, 28)
63 batch_labels = batch_labels.resize(50)
64
65 model.train()
66 model.trainning = True #AttributeError: 'MnistConvNet' object has no attribute 'trainning'
67 optimizer.zero_grad()
68 output = model(batch_images)
69 loss = crit(output, batch_labels)
70 loss.backward()
71 optimizer.step()
72
73 pred = output.data.max(1)[1]
74 correct = pred.eq(batch_labels.data).cpu().sum()
75 print('-> Iter {:5d} |'.format(i), 'loss {:7.3f} |'.format(loss.data[0]),
76 'Bch Train Accu {:.2f}'.format(correct / output.size()[1]))
77
78 def step_eval(model, iteration):
79 correct = 0
80 total = val_images.shape[0]
81 lossaccum = 0.
82 print('-> TEST @ {} |'.format(iteration), end='')
83 for i in range(0, val_images.shape[0], 50):
84 images = val_images.iloc[i:i+50].values
85 labels = val_labels.iloc[i:i+50].values
86 images = th.autograd.Variable(th.from_numpy(images))
87 labels = th.autograd.Variable(th.from_numpy(labels))
88 images = images.resize(50, 1, 28, 28)
89 labels = labels.resize(50)
90
91 model.eval()
92 model.trainning = False
93 output = model(images)
94 loss = thnf.nll_loss(output, labels)
95 lossaccum += loss.data[0]
96 pred = output.data.max(1)[1]
97 correct += pred.eq(labels.data).cpu().sum()
98 print('.', end=''); sys.stdout.flush()
99 print('|')
100 print('-> TEST @ {} |'.format(iteration),
101 'Loss {:7.3f} |'.format(lossaccum),
102 'Accu {:.2f}|'.format(correct / total))
103 exit()
104
105 for i in range(20000):
106 step_train(convnet, i)
107 if i>0 and i%100==0:
108 step_eval(convnet, i)
part of output
-> Using TH 0.1.12
-> train set shape (33600, 784)
-> val set shape (8400, 784)
-> Iter 0 | loss 2.286 | Bch Train Accu 0.60
-> Iter 1 | loss 2.340 | Bch Train Accu 0.70