I am using Pytorch to create a CNN for regression on synthetic data. My synthetic data are all positive.
It is a univariate regression problem (one output variable). The output and output were generated synthetically. The output is a gaussian distribution with mean = 1.0, and standard deviation = 0.1.
The input into the CNN is a 2-D tensor with 1 input channel. The 2-D tensor is 10x100. The top row of every sample is 1000, and the bottom row of every sample is 500. For the middle 8 rows, the value decreases monotonically as you move down each column, and the value increases monotonically as you move across row.
I am using MSE as the loss function and stochastic gradient descent for the optimization method. Are these appropriate for my data? Should I do anything differently given that my data is positive and the monotonicity of the data?
This is my code:
num_epochs = 1000
num_classes = 1
learning_rate = 0.0000001dataset, labels = synthetic.get_data()
#convert numpy array to pytorc tensor
dataset = torch.tensor(dataset.astype(np.float32))
labels = labels
labels = torch.tensor(labels.astype(np.float32))dataset = dataset.permute(2,0,1) #permute columns so 1st dimension is num_samples, 2nd is num_TCs, 3rd is num_tie_steps
dataset = dataset.unsqueeze(1) #add a dimension for channelsdataset = dataset.double()
labels = labels.double()
#divide dataset into training and test
#note we do not need to randomly extract from dataset since there is already randomness due
#to how the data was generatednum_samples, num_input_channels, num_time_steps, num_TCs = dataset.size()
num_training_samples = round(0.75*num_samples)training_dataset = dataset[:num_training_samples,:,:,:]
training_labels = labels[:num_training_samples]
test_dataset = dataset[num_training_samples:,:,:,:]
test_labels = labels[num_training_samples:]#function for computing image width or height after convolving or pooling
def calc_size(orig_size, filter_size, padding, stride, layer):
length = (orig_size - filter_size + 2*padding)/stride + 1if length.is_integer() is False:
print(“Filter size in layer {} resulted in non-integer dimension”.format(layer))
print(“This shouldn’t happen for now.”)
sys.exit()return int(length)
class ConvNet(nn.Module):
def init(self):
super().init()filter_size_width_1 = 5 filter_size_height_1 = filter_size_width_1 padding_1 = 0 stride_1 = 1 filter_size_width_2 = 5 filter_size_height_2 = filter_size_width_2 padding_2 = 0 stride_2 = 1 self.layer1 = nn.Sequential( nn.Conv2d(num_input_channels, 32, kernel_size = filter_size_width_1, stride = stride_1, padding = padding_1), #input_channels = 1 b/c grayscale nn.ReLU()) # nn.MaxPool2d(kernel_size = 2, stride = 2)) #image starts out as num_TCs x num_time_steps (10x100) #now we compute the dimensions after 1st conv layer W = calc_size(num_TCs, filter_size_width_1, padding_1, stride_1, 1) #=6 H = calc_size(num_time_steps, filter_size_height_1, padding_1, stride_1, 1) #=6 print("w type", type(W)) print("Width: {}, height: {} after first convolution".format(W,H)) self.layer2 = nn.Sequential( nn.Conv2d(32, 64, kernel_size = filter_size_width_2, stride = stride_2), nn.ReLU()) # nn.MaxPool2d(kernel_size = 2, stride = 2)) #image starts out as num_TCs x num_time_steps (10x100) #now we compute the dimensions after 1st conv layer W = calc_size(W, filter_size_width_2, padding_2, stride_2, 2) H = calc_size(H, filter_size_height_2, padding_2, stride_2, 2) print("Width: {}, height: {} after first convolution".format(W,H)) self.drop_out = nn.Dropout() #specify 2 fully connected layers self.fc1 = nn.Linear(W * H * 64, 1000) self.fc2 = nn.Linear(1000, 1) #10 outputs for 10 digit classes
def forward(self, data_to_propagate): #analogous to virtual functions in C++, we’re overriding the forward method in base class (nn.Module)
out = self.layer1(data_to_propagate)
out = self.layer2(out)
out = out.reshape(out.size(0), -1) #reshape to 1st order tensor for FC layer
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)return out
#create ConvNet instance
model = ConvNet()#Loss and optimizer
cost_func = nn.MSELoss() #this contains both cross entropy and softmax
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)#training stage
loss_list =
mse_list =for epoch in range(num_epochs):
#forward pass; images var consist of batch of images
outputs = model(training_dataset)
print(outputs)loss = cost_func(outputs, training_labels)
#print(“loss :”, loss)
loss_list.append(loss.item())#backpropagation and optimization
optimizer.zero_grad()
loss.backward() #calculates gradients in back propagation
optimizer.step() #after coputing gradients, perform optimization#track acuracy
predicted = outputs.data
predicted = predicted.squeeze(1) #change it from 2D 1 column array to 1D array
#mse_err = torch.sum((predicted.float() - training_labels) ** 2) #output of sum() is a tensor
#mse_list.append(mse_err)print(“Epoch: {} | MSE: {}”.format(epoch, loss))
I am getting terrible results. My predicted labels also end up negative, which shouldn’t happen. The MSE decreases, however, the convergence is very slow, and the predicted labels appear to be fluctuating. Any advice on how to remedy this?