Hi!
I created a model to classify chess positions as a good move for black or white
I tried training the model but the validation accuracy almost stays constant and the loss is oscillating
A log of the Accuracy and loss:
Epoch 0 Accuracy 36.0772 Loss 4.301493
Epoch 1 Accuracy 36.0518 Loss 3.896801
Epoch 2 Accuracy 36.0772 Loss 4.297735
Epoch 3 Accuracy 36.1280 Loss 3.851907
Epoch 4 Accuracy 36.0010 Loss 3.515308
Epoch 5 Accuracy 36.0772 Loss 3.375627
Epoch 6 Accuracy 36.0264 Loss 3.823340
Epoch 7 Accuracy 36.0518 Loss 3.453476
Epoch 8 Accuracy 36.1026 Loss 3.087875
Epoch 9 Accuracy 36.0518 Loss 4.504724
Epoch 10 Accuracy 36.1535 Loss 3.944135
Epoch 11 Accuracy 35.9502 Loss 4.940733
Epoch 12 Accuracy 36.0518 Loss 4.199282
Epoch 13 Accuracy 36.1280 Loss 4.538399
Epoch 14 Accuracy 36.0010 Loss 3.787019
Epoch 15 Accuracy 35.9756 Loss 3.599001
Epoch 16 Accuracy 36.0010 Loss 4.977734
Epoch 17 Accuracy 35.9756 Loss 4.441586
Epoch 18 Accuracy 36.1026 Loss 4.767806
Epoch 19 Accuracy 36.1535 Loss 4.689194
Epoch 20 Accuracy 36.2043 Loss 3.324705
Epoch 21 Accuracy 36.0264 Loss 3.859550
Epoch 22 Accuracy 36.0772 Loss 3.823349
Epoch 23 Accuracy 36.0264 Loss 3.766785
Epoch 24 Accuracy 36.0772 Loss 3.660792
Epoch 25 Accuracy 36.1026 Loss 4.057627
Epoch 26 Accuracy 36.0264 Loss 4.357696
model.py
class Network(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1,64,4,2,1)
self.conv2 = nn.Conv2d(64,64,2,1,1)
self.fc1 = nn.Linear(64*5*5,1024)
self.fc2 = nn.Linear(1024,3)
def forward(self,x):
x = x.view(-1,1,8,8)
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = x.view(-1,64*25)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return x
def load(f):
self.state_dict = torch.load(f).state_dict()
self.eval()
train.py
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader,ConcatDataset
torch.manual_seed(0)
from data import Data
from model import Network
from utils import *
net = Network()
total_train = []
total_test = []
dir_='data/processed/'
for f in os.listdir(dir_):
train = Data(dir_+f,train=True)
test = Data(dir_+f,train=False)
total_train.append(train)
total_test.append(test)
train = ConcatDataset(total_train)
test = ConcatDataset(total_test)
trainloader = DataLoader(train,batch_size=128,shuffle=True)
testloader = DataLoader(test,batch_size=32,shuffle=True)
print(f'Name {" "*10}|len\nTest {" "*10}|{len(test)}\nTrin {" "*10}|{len(train)}')
epochs = 500
lr = 0.0001
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters())
pacc = 0
for e in range(epochs):
net.train()
for x,y in trainloader:
optimizer.zero_grad()
p = net(x)
loss = loss_fn(p,torch.argmax(y,dim=1))
optimizer.step()
net.eval()
acc = []
with torch.no_grad():
for x,y in testloader:
x = x.unsqueeze(1)
p = net(x)
a = accuracy(p,torch.argmax(y,dim=1))
acc.append(a)
acc = torch.mean(torch.Tensor(acc))
print(f'Epoch {e} Accuracy {acc:.4f} Loss {loss:5f}')
if acc > pacc:
torch.save(net,f'acc_{acc:.4f}')
pacc = acc
the data is chess data from the “figs game database”
it is preprocessed as follows
import bz2
import re
import chess
import numpy as np
import bz2
import os
class DataBuilder:
def __init__(self,dir_,save_dir='data/processed'):
self.dir = dir_
self.data={'x':[],'y':[]}
self.save_dir = save_dir
def remove_metadata(self,f):
print(type(f))
for idx,line in enumerate(f.split('\n')):
if line[:2] == '1.' and "forfeits by disconnection" not in line:
end = line.index('{')
pline = line[:end].replace('+','').replace('#','')
pline = re.sub(r'[1-9][0-9]*\.\s', '', pline)
result = float(0 if '1-0' in line[end:]
else 1 if '0-1' in line[end:]
else 0.5)
self.data['x'].append(pline)
self.data['y'].append(result)
def read_bz2(self,f):
f = bz2.open(f,'r')
f = f.read().decode()
return f
def read(self,f):
f = open(f,'r')
f = f.read()
return f
def one_hot(self):
piece_dict = {
'p':-1,
'r':-5,
'n':-3,
'b':-3.5,
'q':-9,
'k':-999,
'P':1,
'R':5,
'N':3,
'B':3.5,
'Q':9,
'K':999,
'.':0
}
result_dict = {
0:[1,0,0],
1:[0,0,1],
0.5:[0,1,0]
}
def make_matrix(board):
board = board.epd().split(' ',1)[0].split('/')
b = []
for row in board:
for cell in row:
if cell.isdigit():
for _ in range(int(cell)):
b.append(piece_dict['.'])
else:
b.append(piece_dict[cell])
return b
for idx,(x,y) in enumerate(zip(self.data['x'],self.data['y'])):
board = chess.Board()
moves = x.split()
for move in moves:
board.push_san(move)
self.data['x'][idx] = make_matrix(board)
self.data['y'][idx] = result_dict[y]
def process(self):
for f in os.listdir(self.dir):
if f.endswith('bz2'):
self.remove_metadata(self.read_bz2(self.dir+f))
else:
self.remove_metadata(self.read(self.dir+f))
self.one_hot()
self.save(f)
self.data={'x':[],'y':[]}
def save(self,f):
np.save(f'{self.save_dir}/{f}_processed',self.data)
builder = DataBuilder('data/raw/')
builder.process()
torch dataset
import numpy as np
import torch
from torch.utils.data import Dataset
class Data(Dataset):
def __init__(self,data=None,train=True,tratio=0.005):
data = np.load(data,allow_pickle=1).tolist()
self.x = data['x']
self.y = data['y']
tnum = int(tratio*len(self.y))
if train:
self.x = self.x[:len(self.y)-tnum]
self.y = self.y[:len(self.y)-tnum]
else:
self.x = self.x[-tnum:]
self.y = self.y[-tnum:]
def __getitem__(self,idx):
x = torch.Tensor(self.x[idx]).T
y = torch.Tensor(self.y[idx])
return x,y
def __len__(self):
return len(self.y)
utils.py
def accuracy(p,y):
return round(100*(sum(torch.argmax(p,dim=1) == y)/len(y)).item(),4)
I would be glad if somone checks this out
I have been working on this for three days. I tried changing the model architecture and changing the learning rate