I am using Pytorch to create a CNN for regression on synthetic data. My synthetic data are all positive.

It is a univariate regression problem (one output variable). The output and output were generated synthetically. The output is a gaussian distribution with mean = 1.0, and standard deviation = 0.1.

The input into the CNN is a 2-D tensor with 1 input channel. The 2-D tensor is 10x100. The top row of every sample is 1000, and the bottom row of every sample is 500. For the middle 8 rows, the value decreases monotonically as you move down each column, and the value increases monotonically as you move across row.

I am using MSE as the loss function and stochastic gradient descent for the optimization method. Are these appropriate for my data? Should I do anything differently given that my data is positive and the monotonicity of the data?

This is my code:

num_epochs = 1000

num_classes = 1

learning_rate = 0.0000001dataset, labels = synthetic.get_data()

#convert numpy array to pytorc tensor

dataset = torch.tensor(dataset.astype(np.float32))

labels = labels

labels = torch.tensor(labels.astype(np.float32))dataset = dataset.permute(2,0,1) #permute columns so 1st dimension is num_samples, 2nd is num_TCs, 3rd is num_tie_steps

dataset = dataset.unsqueeze(1) #add a dimension for channels## dataset = dataset.double()

## labels = labels.double()

#divide dataset into training and test

#note we do not need to randomly extract from dataset since there is already randomness due

#to how the data was generatednum_samples, num_input_channels, num_time_steps, num_TCs = dataset.size()

num_training_samples = round(0.75*num_samples)training_dataset = dataset[:num_training_samples,:,:,:]

training_labels = labels[:num_training_samples]

test_dataset = dataset[num_training_samples:,:,:,:]

test_labels = labels[num_training_samples:]#function for computing image width or height after convolving or pooling

def calc_size(orig_size, filter_size, padding, stride, layer):

length = (orig_size - filter_size + 2*padding)/stride + 1if length.is_integer() is False:

print(“Filter size in layer {} resulted in non-integer dimension”.format(layer))

print(“This shouldn’t happen for now.”)

sys.exit()return int(length)

class ConvNet(nn.Module):

definit(self):

super().init()`filter_size_width_1 = 5 filter_size_height_1 = filter_size_width_1 padding_1 = 0 stride_1 = 1 filter_size_width_2 = 5 filter_size_height_2 = filter_size_width_2 padding_2 = 0 stride_2 = 1 self.layer1 = nn.Sequential( nn.Conv2d(num_input_channels, 32, kernel_size = filter_size_width_1, stride = stride_1, padding = padding_1), #input_channels = 1 b/c grayscale nn.ReLU()) # nn.MaxPool2d(kernel_size = 2, stride = 2)) #image starts out as num_TCs x num_time_steps (10x100) #now we compute the dimensions after 1st conv layer W = calc_size(num_TCs, filter_size_width_1, padding_1, stride_1, 1) #=6 H = calc_size(num_time_steps, filter_size_height_1, padding_1, stride_1, 1) #=6 print("w type", type(W)) print("Width: {}, height: {} after first convolution".format(W,H)) self.layer2 = nn.Sequential( nn.Conv2d(32, 64, kernel_size = filter_size_width_2, stride = stride_2), nn.ReLU()) # nn.MaxPool2d(kernel_size = 2, stride = 2)) #image starts out as num_TCs x num_time_steps (10x100) #now we compute the dimensions after 1st conv layer W = calc_size(W, filter_size_width_2, padding_2, stride_2, 2) H = calc_size(H, filter_size_height_2, padding_2, stride_2, 2) print("Width: {}, height: {} after first convolution".format(W,H)) self.drop_out = nn.Dropout() #specify 2 fully connected layers self.fc1 = nn.Linear(W * H * 64, 1000) self.fc2 = nn.Linear(1000, 1) #10 outputs for 10 digit classes`

def forward(self, data_to_propagate): #analogous to virtual functions in C++, we’re overriding the forward method in base class (nn.Module)

out = self.layer1(data_to_propagate)

out = self.layer2(out)

out = out.reshape(out.size(0), -1) #reshape to 1st order tensor for FC layer

out = self.drop_out(out)

out = self.fc1(out)

out = self.fc2(out)`return out`

#create ConvNet instance

model = ConvNet()#Loss and optimizer

cost_func = nn.MSELoss() #this contains both cross entropy and softmax

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)#training stage

loss_list = []

mse_list = []for epoch in range(num_epochs):

#forward pass; images var consist of batch of images

outputs = model(training_dataset)

print(outputs)loss = cost_func(outputs, training_labels)

#print(“loss :”, loss)

loss_list.append(loss.item())#backpropagation and optimization

optimizer.zero_grad()

loss.backward() #calculates gradients in back propagation

optimizer.step() #after coputing gradients, perform optimization#track acuracy

predicted = outputs.data

predicted = predicted.squeeze(1) #change it from 2D 1 column array to 1D array

#mse_err = torch.sum((predicted.float() - training_labels) ** 2) #output of sum() is a tensor

#mse_list.append(mse_err)print(“Epoch: {} | MSE: {}”.format(epoch, loss))

I am getting terrible results. My predicted labels also end up negative, which shouldn’t happen. The MSE decreases, however, the convergence is very slow, and the predicted labels appear to be fluctuating. Any advice on how to remedy this?