Help troubleshooting: learning on group-averaged data

I am working on a project where it is easy to obtain averaged data over a group of samples but difficult to obtain data on individual samples, and would like to train neural networks to predict on individual samples. I would greatly appreciate assistance in troubleshooting my implementation in PyTorch for machine learning.

Here is an outline for an equivalent problem:

  • 10,000 boxes each contain between 1 and 10 objects of several different types (A, B, C)
  • Each object has some value based on 100 features
  • Only the average value of objects in one entire box can be obtained for learning, not the values of individual objects
  • The derivative of value w.r.t. features is also desired.
  • The goal is to train one value-predicting model for each type of object (E.g. Network “A” is fit to predict the value any A-type object)
  1. I am using one overarching network to backpropagate the box-level value through object-level (sub)networks. The arrangement of (sub)networks changes with every sample, corresponding to the contents of each box. Is this is the correct approach?

  2. Each box’s data is in a dictionary, with a key for each type of object. A list of “feature” tensors, one for each object of that type, is stored in each key. I imagine that the the time taken to read the lists in each dictionary may be inefficient. Is there a better approach to loading this data, given that the amount of information varies for each sample?

I have attached my code below, and would appreciate any tips to improving the code as well as alternative approaches to the problem.

import torch
import numpy as np


class Submodel(torch.nn.Module):
    def __init__(self, type_):
        """
        One submodel to handle each type of object.
        """
        super(Submodel, self).__init__()
        self.layers = [torch.nn.Linear(100, 15),
                       torch.nn.Linear(15, 15),
                       torch.nn.Softplus(),
                       torch.nn.Dropout(p=0.20),
                       torch.nn.Linear(15, 15),
                       torch.nn.Softplus(),
                       torch.nn.Dropout(p=0.20),
                       torch.nn.Linear(15, 1)]
        for i, layer in enumerate(self.layers):
            self.add_module("{}.{}".format(type_, i), layer)

    def forward_with_derivatives(self, data):
        """
        Used once the network is fully-trained.
        """
        h = data
        for layer in self.layers:
            h = layer(h)
        output = h
        output.backward()
        gradient = data.grad
        return output, gradient

    def forward(self, data):
        h = data
        for layer in self.layers:
            h = layer(h)
        output = h
        return output


class Model(torch.nn.Module):
    """
    Only used for training, when only box-level data is available.
    """
    def __init__(self, types):
        super(Model, self).__init__()
        self.submodels = {}

        for type_ in types:
            submodel = Submodel(type_)
            print(submodel)

            self.submodels[type_] = submodel
            self.add_module("{}_module".format(type_), submodel)

    def forward(self, batch_data):
        batch_output = []
        for data in batch_data:
            obj_values = []
            for type_, type_data in data.items():
                type_values = []
                for obj_features in type_data:
                    obj_value = self.submodels[type_](obj_features)
                    type_values.append(obj_value)
                obj_values.extend(type_values)
            box_value = torch.cat(obj_values, dim=0).mean(0)
            batch_output.append(box_value)
        return torch.cat(batch_output, dim=0)


class NetworkHandler:
    def __init__(self, types):
        self.types = types
        self.indices = np.arange(10000)  # number of samples
        self.training_indices = []
        self.training_set = []
        self.testing_set = []
        self.model = None

        self.loss = torch.nn.MSELoss()
        self.batch_loss = torch.nn.MSELoss(reduce=False)
        self.dtype = torch.FloatTensor

    def placeholder_data(self):
        boxes = []
        for i in self.indices:
            box = {}
            for type_ in self.types:
                n_points = np.random.randint(1, 10)
                objs = []
                for j in np.arange(n_points):
                    feature = np.random.rand(1, 100)
                    tensor = torch.from_numpy(feature)
                    variable = torch.autograd.Variable(tensor,
                                                       requires_grad=False)
                    objs.append(variable.type(self.dtype))
                box[type_] = objs
            boxes.append(box)
        self.data = boxes[:8000]
        self.data_test = boxes[-2000:]
        self.training_indices = self.indices[:8000]
        #  arbitrary 80-20 split

        box_values = np.random.rand(len(self.indices),)*1000
        self.target = box_values[:8000]
        self.target_test = box_values[-2000:]
        
        self.test_batches = np.array_split(np.arange(len(self.data_test)), 10)
        #  10 arbitrary minibatches for testing due to memory constraints

    def train(self):
        np.random.shuffle(self.training_indices)
        self.model.train()
        epoch_loss = 0
        batch_steps = 0
        for minibatch in np.array_split(self.training_indices, 100):
            #  each minibatch is a list of sample indices
            #  arbitrary 100 minibatches of 100 samples each
            self.optimizer.zero_grad()
            output = self.model([self.data[i] for i in minibatch])
            batch_target = self.dtype([self.target[i] for i in minibatch])
            target = torch.autograd.Variable(batch_target).type(self.dtype)
            loss = self.loss(output, target)
            batch_loss = loss.data.numpy()[0]
            epoch_loss += batch_loss
            loss.backward()
            self.optimizer.step()
            batch_steps += 1
        epoch_loss /= batch_steps
        return epoch_loss

    def test(self):
        self.model.eval()
        batch_outputs = []
        sample_losses = []
        for minibatch in self.test_batches:
            #  minibatches for testing due to memory constraints
            out = self.model([self.data_test[i] for i in minibatch])
            batch_target = self.dtype([self.target_test[i] for i in minibatch])
            var = torch.autograd.Variable(batch_target, requires_grad=False)
            batch_loss = self.batch_loss(out, var.type(self.dtype)).data
            sample_losses.extend(batch_loss.numpy().tolist())
            batch_outputs.extend(out.data.numpy().tolist())
        return np.mean(sample_losses)

    def fit(self, epochs=1000):
        self.model = Model(self.types)
        self.optimizer = torch.optim.Adam(self.model.parameters())
        train_losses = []
        test_losses = []
        try:
            for epoch in range(epochs):
                train_loss = self.train()
                train_losses.append(train_loss)
                test_loss = self.test()
                print('Train: {0:<10.1f}| Test: {1:<10.1f}'.format(train_loss, test_loss))
                test_losses.append(test_loss)
        except (KeyboardInterrupt, SystemExit):
            print('\n')
        return train_losses, test_losses

types = ['A', 'B', 'C']
network = NetworkHandler(types)
network.placeholder_data()
train_losses, test_losses = network.fit(epochs=1000)