I am working on a project where it is easy to obtain averaged data over a group of samples but difficult to obtain data on individual samples, and would like to train neural networks to predict on individual samples. I would greatly appreciate assistance in troubleshooting my implementation in PyTorch for machine learning.
Here is an outline for an equivalent problem:
 10,000 boxes each contain between 1 and 10 objects of several different types (A, B, C)
 Each object has some value based on 100 features
 Only the average value of objects in one entire box can be obtained for learning, not the values of individual objects
 The derivative of value w.r.t. features is also desired.
 The goal is to train one valuepredicting model for each type of object (E.g. Network “A” is fit to predict the value any Atype object)

I am using one overarching network to backpropagate the boxlevel value through objectlevel (sub)networks. The arrangement of (sub)networks changes with every sample, corresponding to the contents of each box. Is this is the correct approach?

Each box’s data is in a dictionary, with a key for each type of object. A list of “feature” tensors, one for each object of that type, is stored in each key. I imagine that the the time taken to read the lists in each dictionary may be inefficient. Is there a better approach to loading this data, given that the amount of information varies for each sample?
I have attached my code below, and would appreciate any tips to improving the code as well as alternative approaches to the problem.
import torch
import numpy as np
class Submodel(torch.nn.Module):
def __init__(self, type_):
"""
One submodel to handle each type of object.
"""
super(Submodel, self).__init__()
self.layers = [torch.nn.Linear(100, 15),
torch.nn.Linear(15, 15),
torch.nn.Softplus(),
torch.nn.Dropout(p=0.20),
torch.nn.Linear(15, 15),
torch.nn.Softplus(),
torch.nn.Dropout(p=0.20),
torch.nn.Linear(15, 1)]
for i, layer in enumerate(self.layers):
self.add_module("{}.{}".format(type_, i), layer)
def forward_with_derivatives(self, data):
"""
Used once the network is fullytrained.
"""
h = data
for layer in self.layers:
h = layer(h)
output = h
output.backward()
gradient = data.grad
return output, gradient
def forward(self, data):
h = data
for layer in self.layers:
h = layer(h)
output = h
return output
class Model(torch.nn.Module):
"""
Only used for training, when only boxlevel data is available.
"""
def __init__(self, types):
super(Model, self).__init__()
self.submodels = {}
for type_ in types:
submodel = Submodel(type_)
print(submodel)
self.submodels[type_] = submodel
self.add_module("{}_module".format(type_), submodel)
def forward(self, batch_data):
batch_output = []
for data in batch_data:
obj_values = []
for type_, type_data in data.items():
type_values = []
for obj_features in type_data:
obj_value = self.submodels[type_](obj_features)
type_values.append(obj_value)
obj_values.extend(type_values)
box_value = torch.cat(obj_values, dim=0).mean(0)
batch_output.append(box_value)
return torch.cat(batch_output, dim=0)
class NetworkHandler:
def __init__(self, types):
self.types = types
self.indices = np.arange(10000) # number of samples
self.training_indices = []
self.training_set = []
self.testing_set = []
self.model = None
self.loss = torch.nn.MSELoss()
self.batch_loss = torch.nn.MSELoss(reduce=False)
self.dtype = torch.FloatTensor
def placeholder_data(self):
boxes = []
for i in self.indices:
box = {}
for type_ in self.types:
n_points = np.random.randint(1, 10)
objs = []
for j in np.arange(n_points):
feature = np.random.rand(1, 100)
tensor = torch.from_numpy(feature)
variable = torch.autograd.Variable(tensor,
requires_grad=False)
objs.append(variable.type(self.dtype))
box[type_] = objs
boxes.append(box)
self.data = boxes[:8000]
self.data_test = boxes[2000:]
self.training_indices = self.indices[:8000]
# arbitrary 8020 split
box_values = np.random.rand(len(self.indices),)*1000
self.target = box_values[:8000]
self.target_test = box_values[2000:]
self.test_batches = np.array_split(np.arange(len(self.data_test)), 10)
# 10 arbitrary minibatches for testing due to memory constraints
def train(self):
np.random.shuffle(self.training_indices)
self.model.train()
epoch_loss = 0
batch_steps = 0
for minibatch in np.array_split(self.training_indices, 100):
# each minibatch is a list of sample indices
# arbitrary 100 minibatches of 100 samples each
self.optimizer.zero_grad()
output = self.model([self.data[i] for i in minibatch])
batch_target = self.dtype([self.target[i] for i in minibatch])
target = torch.autograd.Variable(batch_target).type(self.dtype)
loss = self.loss(output, target)
batch_loss = loss.data.numpy()[0]
epoch_loss += batch_loss
loss.backward()
self.optimizer.step()
batch_steps += 1
epoch_loss /= batch_steps
return epoch_loss
def test(self):
self.model.eval()
batch_outputs = []
sample_losses = []
for minibatch in self.test_batches:
# minibatches for testing due to memory constraints
out = self.model([self.data_test[i] for i in minibatch])
batch_target = self.dtype([self.target_test[i] for i in minibatch])
var = torch.autograd.Variable(batch_target, requires_grad=False)
batch_loss = self.batch_loss(out, var.type(self.dtype)).data
sample_losses.extend(batch_loss.numpy().tolist())
batch_outputs.extend(out.data.numpy().tolist())
return np.mean(sample_losses)
def fit(self, epochs=1000):
self.model = Model(self.types)
self.optimizer = torch.optim.Adam(self.model.parameters())
train_losses = []
test_losses = []
try:
for epoch in range(epochs):
train_loss = self.train()
train_losses.append(train_loss)
test_loss = self.test()
print('Train: {0:<10.1f} Test: {1:<10.1f}'.format(train_loss, test_loss))
test_losses.append(test_loss)
except (KeyboardInterrupt, SystemExit):
print('\n')
return train_losses, test_losses
types = ['A', 'B', 'C']
network = NetworkHandler(types)
network.placeholder_data()
train_losses, test_losses = network.fit(epochs=1000)