RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 3, 5, 5], but got 1-dimensional input of size [128] instead

Your previous code already sampled the data, so you can reuse it:

class Siamese(Dataset):
    def __init__(self, dataset):
        super().__init__()
        self.dataset = dataset

    def __getitem__(self, index):
        # We need approx 50 % of  images of the same class
        same_class = random.randint(0, 1)
        img_0, label_0 = self.dataset[index]
        if same_class:
            while True:
                # keep looping till the same class image is found
                index_1 = random.randint(0, self.__len__()-1)
                img_1, label_1 = self.dataset[index_1]

                if label_0 == label_1:
                    break
        else:
            while True:
                index_1 = random.randint(0, self.__len__()-1)
                img_1, label_1 = self.dataset[index_1]
                if label_0 != label_1:
                    break

        return (img_0, label_0), (img_1, label_1)

    def __len__(self):
        return len(self.dataset)

Thank you so much @ptrblck. It worked. I am getting very little accuracy around 3% and it remains constant throughout 100 epochs. also, the avg loss I am getting is 10000.
I guess I am making mistake in the Loss function definition:

class ContrastiveLoss(nn.Module):
    def __init__(self, margin):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin
        self.eps = 1e-9

    def forward(self, output1, output2, target0, target1, size_average=True):
        distances = (output2 - output1).pow(2).sum(1)  # squared distances
        losses = 0.5 *( (target0.float() *target1.float()) *distances +
                        (1 + -1 *((target0*target1)).float() * F.relu(self.margin - (distances + self.eps).sqrt()).pow(2))
        return losses.mean() if size_average else losses.sum()

Also in network

class EmbeddingNet(nn.Module):
    def __init__(self):
        super(EmbeddingNet, self).__init__()
        self.convnet = nn.Sequential(nn.Conv2d(3, 32, 5), nn.PReLU(),
                                     nn.MaxPool2d(2, stride=2),
                                     nn.Conv2d(32, 64, 5), nn.PReLU(),
                                     nn.MaxPool2d(2, stride=2))

        self.fc = nn.Sequential(nn.Linear(64 * 61 * 61, 256),
                                nn.PReLU(),
                                nn.Linear(256, 256),
                                nn.PReLU(),
                                nn.Linear(256, 2)
                                )

    def forward(self, x):
        output = self.convnet(x)
        output = output.view(output.size()[0], -1)
        output = self.fc(output)
        return output

    def get_embedding(self, x):
        return self.forward(x)

class SiameseNet(nn.Module):
    def __init__(self, embedding_net):
        super(SiameseNet, self).__init__()
        self.embedding_net = embedding_net

    def forward(self, x1, x2):
        output1 = self.embedding_net(x1)
        output2 = self.embedding_net(x2)
        return output1, output2

    def get_embedding(self, x):
        return self.embedding_net(x)

Shall I add more layers to improve accuracy or change the network entirely? what would be your suggestion.

I would suggest to check the loss calculation next and see, if negative values are expected or not.
Usually loss functions are defined in the range [0, +Inf], so your losses are unusual. Negative losses can of course work (you could just subtract a constant from e.g. nn.CrossEntropyLoss and it would still work), but the overall training also seem to be quite unstable.