Help troubleshooting: learning on group-averaged data

I am working on a project where it is easy to obtain averaged data over a group of samples but difficult to obtain data on individual samples, and would like to train neural networks to predict on individual samples. I would greatly appreciate assistance in troubleshooting my implementation in PyTorch for machine learning.

Here is an outline for an equivalent problem:

  • 10,000 boxes each contain between 1 and 10 objects of several different types (A, B, C)
  • Each object has some value based on 100 features
  • Only the average value of objects in one entire box can be obtained for learning, not the values of individual objects
  • The derivative of value w.r.t. features is also desired.
  • The goal is to train one value-predicting model for each type of object (E.g. Network “A” is fit to predict the value any A-type object)
  1. I am using one overarching network to backpropagate the box-level value through object-level (sub)networks. The arrangement of (sub)networks changes with every sample, corresponding to the contents of each box. Is this is the correct approach?

  2. Each box’s data is in a dictionary, with a key for each type of object. A list of “feature” tensors, one for each object of that type, is stored in each key. I imagine that the the time taken to read the lists in each dictionary may be inefficient. Is there a better approach to loading this data, given that the amount of information varies for each sample?

I have attached my code below, and would appreciate any tips to improving the code as well as alternative approaches to the problem.

import torch
import numpy as np

class Submodel(torch.nn.Module):
    def __init__(self, type_):
        One submodel to handle each type of object.
        super(Submodel, self).__init__()
        self.layers = [torch.nn.Linear(100, 15),
                       torch.nn.Linear(15, 15),
                       torch.nn.Linear(15, 15),
                       torch.nn.Linear(15, 1)]
        for i, layer in enumerate(self.layers):
            self.add_module("{}.{}".format(type_, i), layer)

    def forward_with_derivatives(self, data):
        Used once the network is fully-trained.
        h = data
        for layer in self.layers:
            h = layer(h)
        output = h
        gradient = data.grad
        return output, gradient

    def forward(self, data):
        h = data
        for layer in self.layers:
            h = layer(h)
        output = h
        return output

class Model(torch.nn.Module):
    Only used for training, when only box-level data is available.
    def __init__(self, types):
        super(Model, self).__init__()
        self.submodels = {}

        for type_ in types:
            submodel = Submodel(type_)

            self.submodels[type_] = submodel
            self.add_module("{}_module".format(type_), submodel)

    def forward(self, batch_data):
        batch_output = []
        for data in batch_data:
            obj_values = []
            for type_, type_data in data.items():
                type_values = []
                for obj_features in type_data:
                    obj_value = self.submodels[type_](obj_features)
            box_value =, dim=0).mean(0)
        return, dim=0)

class NetworkHandler:
    def __init__(self, types):
        self.types = types
        self.indices = np.arange(10000)  # number of samples
        self.training_indices = []
        self.training_set = []
        self.testing_set = []
        self.model = None

        self.loss = torch.nn.MSELoss()
        self.batch_loss = torch.nn.MSELoss(reduce=False)
        self.dtype = torch.FloatTensor

    def placeholder_data(self):
        boxes = []
        for i in self.indices:
            box = {}
            for type_ in self.types:
                n_points = np.random.randint(1, 10)
                objs = []
                for j in np.arange(n_points):
                    feature = np.random.rand(1, 100)
                    tensor = torch.from_numpy(feature)
                    variable = torch.autograd.Variable(tensor,
                box[type_] = objs
            boxes.append(box) = boxes[:8000]
        self.data_test = boxes[-2000:]
        self.training_indices = self.indices[:8000]
        #  arbitrary 80-20 split

        box_values = np.random.rand(len(self.indices),)*1000 = box_values[:8000]
        self.target_test = box_values[-2000:]
        self.test_batches = np.array_split(np.arange(len(self.data_test)), 10)
        #  10 arbitrary minibatches for testing due to memory constraints

    def train(self):
        epoch_loss = 0
        batch_steps = 0
        for minibatch in np.array_split(self.training_indices, 100):
            #  each minibatch is a list of sample indices
            #  arbitrary 100 minibatches of 100 samples each
            output = self.model([[i] for i in minibatch])
            batch_target = self.dtype([[i] for i in minibatch])
            target = torch.autograd.Variable(batch_target).type(self.dtype)
            loss = self.loss(output, target)
            batch_loss =[0]
            epoch_loss += batch_loss
            batch_steps += 1
        epoch_loss /= batch_steps
        return epoch_loss

    def test(self):
        batch_outputs = []
        sample_losses = []
        for minibatch in self.test_batches:
            #  minibatches for testing due to memory constraints
            out = self.model([self.data_test[i] for i in minibatch])
            batch_target = self.dtype([self.target_test[i] for i in minibatch])
            var = torch.autograd.Variable(batch_target, requires_grad=False)
            batch_loss = self.batch_loss(out, var.type(self.dtype)).data
        return np.mean(sample_losses)

    def fit(self, epochs=1000):
        self.model = Model(self.types)
        self.optimizer = torch.optim.Adam(self.model.parameters())
        train_losses = []
        test_losses = []
            for epoch in range(epochs):
                train_loss = self.train()
                test_loss = self.test()
                print('Train: {0:<10.1f}| Test: {1:<10.1f}'.format(train_loss, test_loss))
        except (KeyboardInterrupt, SystemExit):
        return train_losses, test_losses

types = ['A', 'B', 'C']
network = NetworkHandler(types)
train_losses, test_losses =