I am working on a project where it is easy to obtain averaged data over a group of samples but difficult to obtain data on individual samples, and would like to train neural networks to predict on individual samples. I would greatly appreciate assistance in troubleshooting my implementation in PyTorch for machine learning.
Here is an outline for an equivalent problem:
- 10,000 boxes each contain between 1 and 10 objects of several different types (A, B, C)
- Each object has some value based on 100 features
- Only the average value of objects in one entire box can be obtained for learning, not the values of individual objects
- The derivative of value w.r.t. features is also desired.
- The goal is to train one value-predicting model for each type of object (E.g. Network “A” is fit to predict the value any A-type object)
-
I am using one overarching network to backpropagate the box-level value through object-level (sub)networks. The arrangement of (sub)networks changes with every sample, corresponding to the contents of each box. Is this is the correct approach?
-
Each box’s data is in a dictionary, with a key for each type of object. A list of “feature” tensors, one for each object of that type, is stored in each key. I imagine that the the time taken to read the lists in each dictionary may be inefficient. Is there a better approach to loading this data, given that the amount of information varies for each sample?
I have attached my code below, and would appreciate any tips to improving the code as well as alternative approaches to the problem.
import torch
import numpy as np
class Submodel(torch.nn.Module):
def __init__(self, type_):
"""
One submodel to handle each type of object.
"""
super(Submodel, self).__init__()
self.layers = [torch.nn.Linear(100, 15),
torch.nn.Linear(15, 15),
torch.nn.Softplus(),
torch.nn.Dropout(p=0.20),
torch.nn.Linear(15, 15),
torch.nn.Softplus(),
torch.nn.Dropout(p=0.20),
torch.nn.Linear(15, 1)]
for i, layer in enumerate(self.layers):
self.add_module("{}.{}".format(type_, i), layer)
def forward_with_derivatives(self, data):
"""
Used once the network is fully-trained.
"""
h = data
for layer in self.layers:
h = layer(h)
output = h
output.backward()
gradient = data.grad
return output, gradient
def forward(self, data):
h = data
for layer in self.layers:
h = layer(h)
output = h
return output
class Model(torch.nn.Module):
"""
Only used for training, when only box-level data is available.
"""
def __init__(self, types):
super(Model, self).__init__()
self.submodels = {}
for type_ in types:
submodel = Submodel(type_)
print(submodel)
self.submodels[type_] = submodel
self.add_module("{}_module".format(type_), submodel)
def forward(self, batch_data):
batch_output = []
for data in batch_data:
obj_values = []
for type_, type_data in data.items():
type_values = []
for obj_features in type_data:
obj_value = self.submodels[type_](obj_features)
type_values.append(obj_value)
obj_values.extend(type_values)
box_value = torch.cat(obj_values, dim=0).mean(0)
batch_output.append(box_value)
return torch.cat(batch_output, dim=0)
class NetworkHandler:
def __init__(self, types):
self.types = types
self.indices = np.arange(10000) # number of samples
self.training_indices = []
self.training_set = []
self.testing_set = []
self.model = None
self.loss = torch.nn.MSELoss()
self.batch_loss = torch.nn.MSELoss(reduce=False)
self.dtype = torch.FloatTensor
def placeholder_data(self):
boxes = []
for i in self.indices:
box = {}
for type_ in self.types:
n_points = np.random.randint(1, 10)
objs = []
for j in np.arange(n_points):
feature = np.random.rand(1, 100)
tensor = torch.from_numpy(feature)
variable = torch.autograd.Variable(tensor,
requires_grad=False)
objs.append(variable.type(self.dtype))
box[type_] = objs
boxes.append(box)
self.data = boxes[:8000]
self.data_test = boxes[-2000:]
self.training_indices = self.indices[:8000]
# arbitrary 80-20 split
box_values = np.random.rand(len(self.indices),)*1000
self.target = box_values[:8000]
self.target_test = box_values[-2000:]
self.test_batches = np.array_split(np.arange(len(self.data_test)), 10)
# 10 arbitrary minibatches for testing due to memory constraints
def train(self):
np.random.shuffle(self.training_indices)
self.model.train()
epoch_loss = 0
batch_steps = 0
for minibatch in np.array_split(self.training_indices, 100):
# each minibatch is a list of sample indices
# arbitrary 100 minibatches of 100 samples each
self.optimizer.zero_grad()
output = self.model([self.data[i] for i in minibatch])
batch_target = self.dtype([self.target[i] for i in minibatch])
target = torch.autograd.Variable(batch_target).type(self.dtype)
loss = self.loss(output, target)
batch_loss = loss.data.numpy()[0]
epoch_loss += batch_loss
loss.backward()
self.optimizer.step()
batch_steps += 1
epoch_loss /= batch_steps
return epoch_loss
def test(self):
self.model.eval()
batch_outputs = []
sample_losses = []
for minibatch in self.test_batches:
# minibatches for testing due to memory constraints
out = self.model([self.data_test[i] for i in minibatch])
batch_target = self.dtype([self.target_test[i] for i in minibatch])
var = torch.autograd.Variable(batch_target, requires_grad=False)
batch_loss = self.batch_loss(out, var.type(self.dtype)).data
sample_losses.extend(batch_loss.numpy().tolist())
batch_outputs.extend(out.data.numpy().tolist())
return np.mean(sample_losses)
def fit(self, epochs=1000):
self.model = Model(self.types)
self.optimizer = torch.optim.Adam(self.model.parameters())
train_losses = []
test_losses = []
try:
for epoch in range(epochs):
train_loss = self.train()
train_losses.append(train_loss)
test_loss = self.test()
print('Train: {0:<10.1f}| Test: {1:<10.1f}'.format(train_loss, test_loss))
test_losses.append(test_loss)
except (KeyboardInterrupt, SystemExit):
print('\n')
return train_losses, test_losses
types = ['A', 'B', 'C']
network = NetworkHandler(types)
network.placeholder_data()
train_losses, test_losses = network.fit(epochs=1000)