It looks like this

```
class SudokuSolver(nn.Module):
def __init__(self, in_size, hidden_size, out_size):
super().__init__()
self.in_size = in_size
self.hidden_size = hidden_size
self.out_size = out_size
#self.hidden = nn.Linear(in_size, hidden_size)
#for now I am using a single layer
self.out = nn.Linear(hidden_size, out_size)
def forward(self, x):
output = torch.softmax(self.out.forward(x), dim=-1)
return output
def train(self, train_data, train_labels,
verbose=100, epochs=100, lr=0.1, l2_weight=0,
validation_data=None, validation_labels=None):
optim = torch.optim.SGD(self.parameters(), lr=lr, weight_decay=l2_weight)
train_loss = []
validation_loss = []
loss_fun = nn.MSELoss()
print('start')
for e in range(epochs):
for n in range(self.in_size):
optim.zero_grad()
output = self(train_data)
likelihood = output.gather(1,train_labels[:, n].detach().long().unsqueeze(1))
loss = -torch.log(likelihood)
loss = torch.mean(loss)
#here it drops the error(e = 0)
loss.backward()
optim.step()
#complete the sudoku
j = torch.arange(train_data.detach().shape[0])
train_data[j, n] = train_labels[j, n].detach()
train_loss.append(loss.detach().numpy())
if verbose!=0 and e%verbose==0:
print(loss.detach())
```

I am trying to solve a sudoku using only nn.Linears. My approach is to predict every digit of my sudoku. After one prediction I am adding the correct digit to my training data and then I am running it again till I fill the whole sudoku. It stops after the first loss.backward().