In Number plate recognition with Tensorflow - Matt's Ramblings, the author has generate dataset from backgroupd picture and number plate font file, you can learn from it.
I’ve meet some problem
while training ,in 2nd minibatch, the output of multilabelmarginloss() is zero, i cann’t find out the reason.
the inputting image size 224224, the target vector width is 252(736),
‘X’ versus 000000000000000000000001000000000000
‘A’ versus 100000000000000000000000000000000000
the source code is as follows:
import …
DIGITS = “0123456789”
LETTERS = “ABCDEFGHIJKLMNOPQRSTUVWXYZ”
CHARS = LETTERS + DIGITS
NPLEN=7
NUM_CLASSES=252
class anprmodel(nn.Module):
def __init__(self):
super(anprmodel,self).__init__()
self.num_classes=NUM_CLASSES
self.conv1=nn.Conv2d(1,48,kernel_size=5,padding=2)
self.pool1=nn.MaxPool2d(kernel_size=(2,2),stride=2)
self.conv2=nn.Conv2d(48,64,kernel_size=5,padding=2)
self.pool2=nn.MaxPool2d(kernel_size=(2,2),stride=(2,2))
self.conv3=nn.Conv2d(64,128,kernel_size=5,padding=2)
self.pool3=nn.MaxPool2d(kernel_size=(2,2),stride=(2,2))
self.fc1=nn.Linear(28*28*128,2048)
self.fc2=nn.Linear(2048,NUM_CLASSES)
def forward(self,x):
x=F.relu(self.pool1(self.conv1(x))) #input: 224*224
x=F.relu(self.pool2(self.conv2(x))) #input 112*112
x=F.relu(self.pool3(self.conv3(x))) #input 56*56
x=x.view(-1,28*28*128) #28*28
x=F.relu(self.fc1(x))
x=self.fc2(x) #output: 252
return x
class NPSET(torch_utils_data.Dataset):
picroot=‘np’
def code_to_vec(self,p, code):
def char_to_vec(c):
y = np.zeros((len(CHARS),))
y[CHARS.index(c)] = 1.0
return y
c = np.vstack([char_to_vec(c) for c in code])
return c.flatten()
def __getitem__(self,index):
label,img=self.labels[index], self.dataset[index]
if self.data_transform is not None:
img=self.data_transform(img)
labelarray=self.code_to_vec(1,label)
return (img,labelarray)
def __len__(self):
return self.len
def __init__(self,root,data_transform=None):
self.picroot=root
self.data_transform=data_transform
if not os.path.exists(self.picroot):
raise RuntimeError('{} doesnot exists'.format(self.picroot))
for root,dnames,filenames in os.walk(self.picroot):
imgs=[]
labels=[]
for filename in filenames:
picfilename=os.path.join(self.picroot,filename) #file name:
im=cv2.imread(picfilename,cv2.IMREAD_GRAYSCALE)
im=cv2.resize(im,(224,224))
imgs.append(im)
m=filename.split('_') #filename style: xxxxxxxx_yyyyyyy_z.png
labels.append(m[1])
self.dataset=imgs
self.labels=labels
self.len=len(filenames)
def accuracy(output,target): #Tensor:Tensor #size: batchsize252
batchsize=output.size(0)
assert(batchsize==target.size(0))
p=torch.chunk(output,7,1) #p[0]–p[6], batchsize36
t=torch.chunk(target,7,1)
a=np.ones((batchsize,1),np.dtype('i8'))*7 #7,7,7,7,7.....7 num is batchsize
ts=torch.from_numpy(a) #LongTensor, tmp, and will be cut
ps=torch.from_numpy(a)
for i in range(0,NPLEN): # the index of max value in every segment
_,pred=torch.max(p[i],1)
ps=torch.cat((ps,pred),1)
_,pred=torch.max(t[i],1)
ts=torch.cat((ts,pred),1)
sub=torch.LongTensor([1,2,3,4,5,6,7])
ts=torch.index_select(ts,1,sub) #LongTensor
ps=torch.index_select(ps,1,sub) #LongTensor
tspseq=torch.eq(ts,ps) #ByteTensor
tspseqsum=torch.sum(tspseq,1) #ByteTensor ,it will be 7 if right
a=np.ones((batchsize,1),np.uint8)*7 #byte ndarray
result=torch.eq(tspseqsum,torch.from_numpy(a))
return batchsize,torch.sum(result) #batchsize number of right
class recMeter(object):
def init(self):
self.reset()
is_best=False
best=0
current=0
def reset(self):
self.right = 0
self.sum = 0
current=0
def updatecnt(self, n, r):
self.right+=r
self.sum+=n
def updateaccurate(self):
self.current=self.right/self.sum
if ac > best:
is_best=True
best=ac
if name == “main”:
model=anprmodel()
model.cuda()
cudnn.benchmark=True
batch_size=10
data_transform=transforms.Compose([transforms.ToTensor(),
transforms.Normalize((107.897212036,), (3893.57887653,)),
])
npset = NPSET(root=‘/home/wang/git/nppic/nproot/plate’, data_transform=data_transform)
nploader = torch.utils.data.DataLoader(npset, batch_size=batch_size, shuffle=True, num_workers=1) #train
npvalset=NPSET(root=‘/home/wang/git/nppic/npval/plate’, data_transform=data_transform)
npvalloader=torch.utils.data.DataLoader(npvalset, batch_size=batch_size, shuffle=False, num_workers=1) #validate
criterion=nn.MultiLabelMarginLoss()
optimizer=torch.optim.SGD(model.parameters(),0.1,momentum=0.9)
meter=recMeter()
for epoch in range(0,1):
#Sets the learning rate to the initial LR decayed by 10 every 30 epochs
lr=0.1*(0.1**(epoch//30))
#for param_group in optimizer.param_groups:
# param_group['lr']=lr
#train
model.train()
for i,data in enumerate(nploader):
inputs,targets = data #inputs size: batchsize*224*224
inputs=torch.unsqueeze(inputs,1) ##inputs size: batchsize*1*224*224
targets=torch.LongTensor(np.array(targets.numpy(),np.long))
targets=targets.cuda()
inputs=inputs.cuda()
input_var=torch.autograd.Variable(inputs)
target_var=torch.autograd.Variable(targets)
optimizer.zero_grad()
output_var=model(input_var)
#porcess loss
character_loss=criterion(output_var,target_var)
# compute gradient and do SGD step
character_loss.backward()
optimizer.step()
I’ve execute train loop in python console by this way:
npiter=iter(nploader)
then
(inputs,targets)=npiter.next()
inputs=torch.unsqueeze(inputs,1)
targets=torch.LongTensor(np.array(targets.numpy(),np.long))
targets=targets.cuda()
inputs=inputs.cuda()
input_var=torch.autograd.Variable(inputs)
target_var=torch.autograd.Variable(targets)
optimizer.zero_grad()
output_var=model(input_var)
character_loss=criterion(output_var,target_var)
character_loss.backward()
optimizer.step()
print(‘Loss: {:.6f}’.format(character_loss.data[0]))
from 2nd mini batch, the loss become 0.
I’ve stucked here.