Looking for suggestions on creating a regression model using CNN

I am using Pytorch to create a CNN for regression on synthetic data. My synthetic data are all positive.

It is a univariate regression problem (one output variable). The output and output were generated synthetically. The output is a gaussian distribution with mean = 1.0, and standard deviation = 0.1.

The input into the CNN is a 2-D tensor with 1 input channel. The 2-D tensor is 10x100. The top row of every sample is 1000, and the bottom row of every sample is 500. For the middle 8 rows, the value decreases monotonically as you move down each column, and the value increases monotonically as you move across row.

I am using MSE as the loss function and stochastic gradient descent for the optimization method. Are these appropriate for my data? Should I do anything differently given that my data is positive and the monotonicity of the data?

This is my code:

num_epochs = 1000
num_classes = 1
learning_rate = 0.0000001

dataset, labels = synthetic.get_data()

#convert numpy array to pytorc tensor
dataset = torch.tensor(dataset.astype(np.float32))
labels = labels
labels = torch.tensor(labels.astype(np.float32))

dataset = dataset.permute(2,0,1) #permute columns so 1st dimension is num_samples, 2nd is num_TCs, 3rd is num_tie_steps
dataset = dataset.unsqueeze(1) #add a dimension for channels

dataset = dataset.double()

labels = labels.double()

#divide dataset into training and test
#note we do not need to randomly extract from dataset since there is already randomness due
#to how the data was generated

num_samples, num_input_channels, num_time_steps, num_TCs = dataset.size()
num_training_samples = round(0.75*num_samples)

training_dataset = dataset[:num_training_samples,:,:,:]
training_labels = labels[:num_training_samples]
test_dataset = dataset[num_training_samples:,:,:,:]
test_labels = labels[num_training_samples:]

#function for computing image width or height after convolving or pooling
def calc_size(orig_size, filter_size, padding, stride, layer):
length = (orig_size - filter_size + 2*padding)/stride + 1

if length.is_integer() is False:
print(“Filter size in layer {} resulted in non-integer dimension”.format(layer))
print(“This shouldn’t happen for now.”)
sys.exit()

return int(length)

class ConvNet(nn.Module):
def init(self):
super().init()

  filter_size_width_1 = 5
  filter_size_height_1 = filter_size_width_1
  padding_1 = 0
  stride_1 = 1

  filter_size_width_2 = 5
  filter_size_height_2 = filter_size_width_2  
  padding_2 = 0
  stride_2 = 1

  self.layer1 = nn.Sequential(
     nn.Conv2d(num_input_channels, 32, kernel_size = filter_size_width_1, stride = stride_1, padding = padding_1),      #input_channels = 1 b/c grayscale
     nn.ReLU())
     # nn.MaxPool2d(kernel_size = 2, stride = 2))

  #image starts out as num_TCs x num_time_steps (10x100)
  #now we compute the dimensions after 1st conv layer
  W = calc_size(num_TCs, filter_size_width_1, padding_1, stride_1, 1)  #=6
  H = calc_size(num_time_steps, filter_size_height_1, padding_1, stride_1, 1)  #=6

  print("w type", type(W))

  print("Width: {}, height: {} after first convolution".format(W,H))

  self.layer2 = nn.Sequential(
     nn.Conv2d(32, 64, kernel_size = filter_size_width_2, stride = stride_2),
     nn.ReLU())
     # nn.MaxPool2d(kernel_size = 2, stride = 2))

  #image starts out as num_TCs x num_time_steps (10x100)
  #now we compute the dimensions after 1st conv layer
  W = calc_size(W, filter_size_width_2, padding_2, stride_2, 2)  
  H = calc_size(H, filter_size_height_2, padding_2, stride_2, 2)

  print("Width: {}, height: {} after first convolution".format(W,H))

  self.drop_out = nn.Dropout()

  #specify 2 fully connected layers
  self.fc1 = nn.Linear(W * H * 64, 1000)
  self.fc2 = nn.Linear(1000, 1)       #10 outputs for 10 digit classes

def forward(self, data_to_propagate): #analogous to virtual functions in C++, we’re overriding the forward method in base class (nn.Module)
out = self.layer1(data_to_propagate)
out = self.layer2(out)
out = out.reshape(out.size(0), -1) #reshape to 1st order tensor for FC layer
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)

  return out

#create ConvNet instance
model = ConvNet()

#Loss and optimizer
cost_func = nn.MSELoss() #this contains both cross entropy and softmax
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

#training stage
loss_list = []
mse_list = []

for epoch in range(num_epochs):
#forward pass; images var consist of batch of images
outputs = model(training_dataset)
print(outputs)

loss = cost_func(outputs, training_labels)
#print(“loss :”, loss)
loss_list.append(loss.item())

#backpropagation and optimization
optimizer.zero_grad()
loss.backward() #calculates gradients in back propagation
optimizer.step() #after coputing gradients, perform optimization

#track acuracy
predicted = outputs.data
predicted = predicted.squeeze(1) #change it from 2D 1 column array to 1D array
#mse_err = torch.sum((predicted.float() - training_labels) ** 2) #output of sum() is a tensor
#mse_list.append(mse_err)

print(“Epoch: {} | MSE: {}”.format(epoch, loss))

I am getting terrible results. My predicted labels also end up negative, which shouldn’t happen. The MSE decreases, however, the convergence is very slow, and the predicted labels appear to be fluctuating. Any advice on how to remedy this?

same what happened to me. Did you figure out the problem?