Hi,

I’m having trouble getting a classifier working. It’s based on a tensorflow application (unfortunately I can’t release the source for examination) which predicts a binary class label for multiple targets.

The problem lies in that the input data has a high proportion of missing values. These are characterised by their own label, which is supposed to be ignored in the loss function. I’ve tried nllloss with the built-in ability to perform this, and also BCE by multiplying the output of the network by zeros for missing values - neither seems to work.

I suspect I’m making a mistake somewhere, but I’m fairly new to pytorch and can’t seem to figure it out. The network doesn’t appear to be treating each task independently - average ROCAUC across all tasks from the original program is ~0.7-0.8, whereas this doesn’t go above 0.55ish. Below is the (relevant, fairly verbose) code:

```
train_features=torch.from_numpy(np.array(train_features))
train_labels=torch.from_numpy(np.array(train_labels))
test_features=torch.from_numpy(np.array(test_features))
test_labels=torch.from_numpy(np.array(test_labels))
test_labels += 1 #Targets in range {-1,0,1} where 0 is unknown datapoint
train_labels += 1 #Change targets range from {-1,0,1} to {0,1,2}
train = torch.utils.data.TensorDataset(train_features.float(), train_labels.long())
train_loader = torch.utils.data.DataLoader(train, batch_size=batch, shuffle=True)
train_loader_2 = torch.utils.data.DataLoader(train, batch_size=train_features.shape[0], shuffle=False)
test = torch.utils.data.TensorDataset(test_features.float(), test_labels.long())
test_loader = torch.utils.data.DataLoader(test, batch_size=batch, shuffle=False)
#Build test, train, and AUROC dataloaders
#for rocauc, convert index to one-hot vector
rocauc_labels=torch.from_numpy(np.array([[i==j for i in [0,1,2]] for j in np.array(test_labels)], np.int32))
rocauc_labels = torch.transpose(rocauc_labels, 1, 2)
rocauc_set = torch.utils.data.TensorDataset(test_features.float(), rocauc_labels.long())
rocauc_loader = torch.utils.data.DataLoader(rocauc_set, batch_size=test_features.shape[0], shuffle=False)
#init network from library
net = Network.FC_FF_RELU_NN([train_features.shape[1],1024,1024,1024,train_labels.shape[1]])
criterion = F.nll_loss #using negative-log loss - this requires a log_softmax before use (is in final layer)
optimizer = torch.optim.SGD(net.parameters(),lr=learning_rate, momentum=momentum, weight_decay=L2loss)#, nesterov=True)
listofloss = []
listofclasslls = []
listofclassrocaucs = []
listofaverageclassll = []
listofaveragerocauc = []
listofactives = []
for i, (features, labels) in enumerate(train_loader_2):
print(labels.shape)
for k in range(len(labels[0,:])): #for each task
numactives = 0
for j in range(len(labels[:,k])): #for each record
if labels[j,k] == 2:
numactives += 1
listofactives.append(numactives)
print(listofactives)
for epoch in range(num_epochs):
epochloss = 0.
for i, (features, labels) in enumerate(train_loader):
features, labels = Variable(features, requires_grad=True), Variable(labels, requires_grad=True)
optimizer.zero_grad() #zero gradients
outputs = net(features, droprate) #calculate outputs from features for batch
loss = 0. #zero loss
for k in range(len(outputs[0,:,0])): #output dims are: [record,task,ln(classprob)]
#weight = torch.Tensor([1.0,0,1.0]) #init class weighting - prefer to correctly classify actives
#weight *= min(listofactives) / listofactives[k]
loss += criterion(outputs[:,k,:], labels[:,k], ignore_index=1)#, weight=weight)
#ignore unknown: for each target ignore class=1(unknown) and calculate loss over known for all mols
loss /= len(outputs.data[0,:,0]) #average loss over all tasks
epochloss += loss.data[0] #increment epoch loss over all batches
loss.backward()
optimizer.step()
epochloss /= i #average epoch loss over all batches
if epoch % 2 == 0:
listofloss.append([epoch, epochloss])
print('Epoch = {}, average epoch loss = {}'.format(epoch, epochloss))
for tsfeatures, tslabels in (rocauc_loader):
tsfeatures = Variable(tsfeatures)
tsoutputs = net(tsfeatures, 0).data.numpy()
classlls = [epoch] #add epoch no. to stats lists
classrocaucs = [epoch] # "" ""
for i in range(len(tslabels[0,:,0])): #for each task
knownpreds = []
knowntars = []
for j in range(len(tslabels[:,i,0])): #for each record in task i
if not tslabels[j,i,1] == 1: #if the value is not unknown
knownpreds.append(math.exp(tsoutputs[j,i,2])) #add to list
knowntars.append(tslabels[j,i,2]) #add to list
classrocauc = roc_auc_score((knowntars), (knownpreds))
classrocaucs.append(classrocauc)
listofclassrocaucs.append(classrocaucs)
listofaveragerocauc.append([epoch,sum(classrocaucs[1:]) / (len(classrocaucs)-1)])
print("Average AUROC: {}".format(sum(classrocaucs[1:]) / (len(classrocaucs)-1)))
```

NN code:

```
class FC_FF_RELU_NN(nn.Module):
def __init__(self, ListOfLayers): #define the init params of the Network Architecture
super(FC_FF_RELU_NN, self).__init__()
self.nb_layers = len(ListOfLayers) #Accept ListOfLayers as an array of ints, specifying input, hidden, and output layer widths
fc = [] #Init fully connected network
for i in range(self.nb_layers-2):
fc.append(nn.Linear(ListOfLayers[i],ListOfLayers[i+1])) #Append layers to list
self.fc = nn.ModuleList(fc) #Convert to ModuleList
self.active = nn.Linear(ListOfLayers[-2],ListOfLayers[-1])
self.inactive = nn.Linear(ListOfLayers[-2],ListOfLayers[-1])
self.unknown = nn.Linear(ListOfLayers[-2],ListOfLayers[-1])
#active, inactive, and unknown may not actually correspond to their respective indices, but are placeholder names
def forward(self, x, droprate): #Define forward function
for i in range(self.nb_layers-2):
x = F.relu(self.fc[i](x)) #ReLu
x = F.dropout(x, p=droprate)
active = self.active(x)
inactive = self.inactive(x)
unknown = self.unknown(x)
output = torch.stack([active,unknown,inactive],dim=-1)
return F.log_softmax(output, dim=-1) #LogSoftMax for last layers for NLL_Loss
```

Any help would be greatly appreciated, this has been driving me nuts for a couple of weeks now.