Converting ComplexFloat to Float at the Last Layer

Hello,

I’m developing an FFN for classification of complex-valued data. As such, need the output of the NN to be real (so I can make the classification). Here, there are several Examples of Possible Softmax Functions. The simpler one would just require me to compute the magnitude before the softmax. However, I get the following error: RuntimeError: mat1 and mat2 must have the same dtype

Anyideas what am I getting wrong? ( @ptrblck maybe? you had helped me before)
Here is the full code:

class FeedforwardNetwork2(nn.Module):
    def __init__(
            self, n_classes, n_features, hidden_sizes, layers,
            activation_type, dropout, **kwargs):
        """
        n_classes (int)
        n_features (int)
        hidden_sizes (list) Note: can also be a int
        layers (int)
        activation_type (str)
        dropout (float): dropout probability
        """
        super().__init__()

        activations = {"tanh": nn.Tanh(), "relu": nn.ReLU()}
        activation = activations[activation_type]

        dropout = nn.Dropout(dropout)

        in_sizes = [n_features] + [hidden_sizes] * layers
        out_sizes = [hidden_sizes] * layers + [n_classes]

        self.feedforward = nn.Sequential(*[
            nn.Sequential(
                nn.Linear(in_size, out_size),
                activation,
                dropout)
            for in_size, out_size in zip(in_sizes[:-1], out_sizes[:-1])],
            nn.Linear(in_sizes[-1], out_sizes[-1]))

    def forward(self, x, **kwargs):
        """
        x (batch_size x n_features): a batch of training examples
        """
        return abs(self.feedforward(x)).to(dtype=torch.float)

def train_batch(X, y, model, optimizer, criterion, **kwargs):
    """
    X (n_examples x n_features)
    y (n_examples): gold labels
    model: a PyTorch defined model
    optimizer: optimizer used in gradient step
    criterion: loss function

    To train a batch, the model needs to predict outputs for X, computes the
    loss between these predictions and the "gold" labels y using the criterion,
    and compute the gradient of the loss with respect to the model parameters.

    Returns the loss to get the loss as a numerical value 
    that is not part of the computation graph.
    """
    optimizer.zero_grad()
    yhat = model(X)
    loss = criterion(yhat, y)
    loss.backward()
    optimizer.step()

    return loss.item()


def predict(model, X):
    """X (n_examples x n_features)"""
    scores = model(X)  # (n_examples x n_classes)
    predicted_labels = scores.argmax(dim=-1)  # (n_examples)
    return predicted_labels


def evaluate(model, X, y):
    """
    X (n_examples x n_features)
    y (n_examples): gold labels
    """
    model.eval()
    y_hat = predict(model, X)
    n_correct = (y == y_hat).sum().item()
    n_possible = float(y.shape[0])
    model.train()
    return n_correct / n_possible


def countDistinct(arr):
 
    res = 0
    set_number = set()
    n = len(arr)
    arr = arr.tolist()
    
    # Pick all elements one by one
    for i in range(1, n):
        
        #Check if unique number is in set
        if arr[i][0] in set_number:
            continue
        else:
            set_number.add(arr[i][0])
            res += 1
     
    return res

def Scaler(X):
    
    # Normalize the data, i.e., center and scale
    mean_ = X.mean(0)
    std_ = torch.std(X, axis=0)
    X_ = (X - mean_) / std_
    
    
    return X_

class ClassificationDataset(torch.utils.data.Dataset):

    def __init__(self, data):
        """
        data: the dict returned by utils.load_classification_data
        """
        train_X, train_y = data["train"]
        #dev_X, dev_y = data["dev"]
        test_X, test_y = data["test"]

        self.X = train_X.clone().detach().requires_grad_(True)
        self.y = train_y.clone().detach().requires_grad_(True)#torch.tensor(train_y, dtype=torch.long)

        #self.dev_X = torch.tensor(dev_X, dtype=torch.float32)
        #self.dev_y = torch.tensor(dev_y, dtype=torch.long)

        self.test_X = test_X.clone().detach().requires_grad_(True)#torch.tensor(test_X, dtype=torch.float32)
        self.test_y = test_y.clone().detach().requires_grad_(True)#torch.tensor(test_y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-model', default='ffn',
                        choices=['logistic_regression', 'ffn'],
                        help="Which model should the script run?")
    parser.add_argument('-epochs', default=20, type=int,
                        help="""Number of epochs to train for. You should not
                        need to change this value for your plots.""")
    parser.add_argument('-batch_size', default=1, type=int,
                        help="Size of training batch.")
    parser.add_argument('-learning_rate', type=float, default=0.01)
    parser.add_argument('-l2_decay', type=float, default=0)
    parser.add_argument('-hidden_sizes', type=int, default=200)
    parser.add_argument('-layers', type=int, default=1)
    parser.add_argument('-dropout', type=float, default=0.3)
    parser.add_argument('-activation',
                        choices=['tanh', 'relu'], default='tanh')
    parser.add_argument('-optimizer',
                        choices=['sgd', 'adam'], default='sgd')
    opt = parser.parse_args()

    utils.configure_seed(seed=42)
    
    freq_range = loadmat('freq_range.mat')['freq_range']
    X = np.array(loadmat('X.mat')['X'])
    Y = np.array(loadmat('Y_class.mat')['Y_class'])
    X = torch.tensor(X,dtype=torch.cfloat, requires_grad=True)
    Y = torch.tensor(Y,dtype=torch.float, requires_grad=True)
    
    n_classes = torch.unique(Y).shape[0]  # 2 classes - Litter or No Litter
    print(n_classes)
    n_feats = X.shape[1]
    # =============================================================================
    #X_pca = loadmat('X.mat')['X']
    #K_fold = [0, 2, 86, 41, 97]
    K_fold = 5
    skf = StratifiedKFold(n_splits=K_fold, random_state=1, shuffle=True)
    skf.get_n_splits(X, Y)
    avg_mis_class_error_1HL = 0
    avg_mis_class_error_2HL = 0
    #Scale the data
    X_scaled = Scaler(X)

    # initialize the model
    model = FeedforwardNetwork2(
            n_classes, n_feats,
            opt.hidden_sizes, opt.layers,
            opt.activation, opt.dropout)

    # get an optimizer
    optims = {"adam": torch.optim.Adam, "sgd": torch.optim.SGD}

    optim_cls = optims[opt.optimizer]
    
    optimizer = optim_cls(
        model.parameters(),
        lr=opt.learning_rate,
        weight_decay=opt.l2_decay)

    # get a loss criterion
    criterion = nn.BCELoss()
    
    # Model questions = https://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw
    for train_index, test_index in skf.split(X.detach().numpy(), Y.detach().numpy()):
    #for k in K_fold:
        # Split into training and test data
        #X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=k)
        X_train, dev_X = X_scaled[train_index], X_scaled[test_index]
        y_train, dev_y = Y[train_index], Y[test_index]
        data = {"train": (X_train, y_train),
                "test": (dev_X, dev_y)}
        dataset=ClassificationDataset(data)
        train_dataloader = DataLoader(
            dataset, batch_size=opt.batch_size, shuffle=True)
        #dataset = utils.ClassificationDataset(data)
        #train_dataloader = DataLoader(
        #    dataset, batch_size=opt.batch_size, shuffle=True)
        # training loop
        epochs = torch.arange(1, opt.epochs + 1)
        train_mean_losses = []
        valid_accs = []
        train_losses = []
        for ii in epochs:
            print('Training epoch {}'.format(ii))
            #for i in range(len(y_train)):
            #    loss = train_batch(
            #        X_train[i], y_train[i], model, optimizer, criterion)
            #    train_losses.append(loss)
            for X_batch, y_batch in train_dataloader:
                loss = train_batch(
                    X_batch, y_batch, model, optimizer, criterion)
                train_losses.append(loss)
    
        mean_loss = torch.tensor(train_losses).mean().item()
        print('Training loss: %.4f' % (mean_loss))
    
        train_mean_losses.append(mean_loss)
        valid_accs.append(evaluate(model, dev_X, dev_y))
        print('Valid acc: %.4f\n' % (valid_accs[-1]))


if __name__ == '__main__':
    main()

I guess the error is raised if you try to mix complex and real dtypes as seen here:

x = torch.tensor([[1.]],dtype=torch.cfloat, requires_grad=True)

lin = nn.Linear(1, 1)
out = lin(x)
# RuntimeError: mat1 and mat2 must have the same dtype

lin.to(torch.cfloat)
out = lin(x)
print(out)
# tensor([[-0.0853+0.j]], grad_fn=<AddmmBackward0>)

I’m not sure what exactly your use case is, but assuming your model’s parameters need to be real, you might need to transform the inputs.