# Difficulty Replicating Simple Binary Classification Tensorflow Results in PyTorch

Hello,

I am trying to train a model in PyTorch, which I have successfully trained in Tensorflow. However, in PyTorch, the model achieves random accuracy (it is a binary classification task). In Tensorflow, the performance is 100% (the task is easy). I’ve been working on this for over a week now, and cannot understand what the problem is.

The task is simple: Given an input sentence that describes a coordinate location, and a X coordinate, is the coordinate matching the sentence described? For example, if you are given a sentence saying “X coordinate is positive”, then the model should predict positive if the input X coordinate is > 0, else negative. Likewise, if the input sentence is “X coordinate is negative”, then the model should predict positive if the input X coordinate is < 0, else negative.

To simulate this for debugging, I created a very simple example with two sentences tokenized as `[1, 5, 6, 8]` and `[1, 5, 6, 9]`. For x coordinates, they can be either -0.5 or 0.5. If the sentence is `[1, 5, 6, 8]`, then x should be 0.5 for a positive prediction, else the model should predict negative. The reverse applies for `[1, 5, 6, 9]`.

The code for this task is as follows. Any help would be greately appreciated!

``````import torch
import torch.nn as nn
import numpy as np
import sklearn.metrics as skm
import torch.nn.functional as F
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

n_epochs = 10
input_dim = 4
hidden_dim = 128
layer_dim = 2
output_dim = 2
batch_size = 50

class FeatureDataSet(torch.utils.data.Dataset):

def __init__(self, x_train, y_train, x_coordinates):
self.x_train = torch.tensor(x_train, dtype=torch.long)
self.y_train = torch.tensor(y_train)
self.x_coordinates = torch.tensor(x_coordinates, dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self, idx):
return self.x_train[idx], self.y_train[idx], self.x_coordinates[idx]

class RNN(nn.Module):

def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, batch_size):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim

# linear layer to encode the coordinate
self.encode_x = nn.Linear(1, hidden_dim).cuda()
self._embeddings = nn.Embedding(40, 100).cuda()

# hidden_dim is 128
# layer_dim is 2
self.lstm = nn.LSTM(100, hidden_dim, layer_dim, batch_first=True).cuda()
self.fc = nn.Linear(2 * hidden_dim, output_dim).cuda()
self.batch_size = batch_size
self.hidden = None

def init_hidden(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
return [t.cpu() for t in (h0, c0)]

def forward(self, x, x_coordinate):
#initializing the hidden states
h0, c0 = self.init_hidden(x)
embeds = self._embeddings(x)
out, (hn, cn) = self.lstm(embeds.cuda(), (h0.cuda(), c0.cuda()))

x_embed = F.relu(self.encode_x(x_coordinate.cuda().to(torch.float32)).cuda())
representations_so_far_added = torch.cat([out[:, -1, :], x_embed], dim=1)

return out

model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
criterion = nn.CrossEntropyLoss()

x_train = []
x_coordinates = []
y_train = []
for i in range(10000):
# create the data. if x_coordinate is 0.5 and the sentence says that (represented by [1, 5, 6, 8]), then we should predict positive else negative (if the x_coordinate == -0.5)
# same applies if the x_coordinate == -0.5, just that the sentence is now [1, 5, 6, 9]
if np.random.randint(0, 2) == 0:
if np.random.randint(0, 2) == 0:
# x coordinate > 0
x_train.append([1, 5, 6, 8])
x_coordinates.append([0.5])
y_train.append(1.0)
else:
# x coordinate > 0 negative
x_train.append([1, 5, 6, 8])
x_coordinates.append([-0.5])
y_train.append(0.0)
else:
if np.random.randint(0, 2) == 0:
# x coordinate < 0
x_train.append([1, 5, 6, 9])
x_coordinates.append([-0.5])
y_train.append(1.0)
else:
# x coordinate < 0 negative
x_train.append([1, 5, 6, 9])
x_coordinates.append([0.5])
y_train.append(0.0)

# print a sample of data
print(x_train[:10])
print(y_train[:10])
print(x_coordinates[:10])

trainingDataset = FeatureDataSet(x_train=x_train, y_train=y_train, x_coordinates=x_coordinates)

# for each epoch
for epoch in range(1, n_epochs + 1):
acc_all = []
# each batch
for i, (x_batch, y_batch, x_coord_batch) in enumerate(train_loader):
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)

x_coord_batch = x_coord_batch.to(device)

# pass in the text (x_batch) and coordinate (x_coord_batch)
out = model(x_batch, x_coordinate=x_coord_batch)
loss = criterion(out.float(), y_batch.type(torch.LongTensor).cuda())
loss.backward()
opt.step()

pred_idx = F.log_softmax(out, dim=1)
target_labels = y_batch.cpu().int()
pred_labels = torch.argmax(pred_idx, dim=-1).cpu().data.int()

curr_acc = skm.accuracy_score(target_labels, pred_labels)
acc_all.append(curr_acc)

# this is 50% for me, which is effectively random
print(np.mean(acc_all))
``````

I don’t understand why the performance is random!