Hello,
I’m developing an FFN for classification of complex-valued data. As such, need the output of the NN to be real (so I can make the classification). Here, there are several Examples of Possible Softmax Functions. The simpler one would just require me to compute the magnitude before the softmax. However, I get the following error: RuntimeError: mat1 and mat2 must have the same dtype
Anyideas what am I getting wrong? ( @ptrblck maybe? you had helped me before)
Here is the full code:
class FeedforwardNetwork2(nn.Module):
def __init__(
self, n_classes, n_features, hidden_sizes, layers,
activation_type, dropout, **kwargs):
"""
n_classes (int)
n_features (int)
hidden_sizes (list) Note: can also be a int
layers (int)
activation_type (str)
dropout (float): dropout probability
"""
super().__init__()
activations = {"tanh": nn.Tanh(), "relu": nn.ReLU()}
activation = activations[activation_type]
dropout = nn.Dropout(dropout)
in_sizes = [n_features] + [hidden_sizes] * layers
out_sizes = [hidden_sizes] * layers + [n_classes]
self.feedforward = nn.Sequential(*[
nn.Sequential(
nn.Linear(in_size, out_size),
activation,
dropout)
for in_size, out_size in zip(in_sizes[:-1], out_sizes[:-1])],
nn.Linear(in_sizes[-1], out_sizes[-1]))
def forward(self, x, **kwargs):
"""
x (batch_size x n_features): a batch of training examples
"""
return abs(self.feedforward(x)).to(dtype=torch.float)
def train_batch(X, y, model, optimizer, criterion, **kwargs):
"""
X (n_examples x n_features)
y (n_examples): gold labels
model: a PyTorch defined model
optimizer: optimizer used in gradient step
criterion: loss function
To train a batch, the model needs to predict outputs for X, computes the
loss between these predictions and the "gold" labels y using the criterion,
and compute the gradient of the loss with respect to the model parameters.
Returns the loss to get the loss as a numerical value
that is not part of the computation graph.
"""
optimizer.zero_grad()
yhat = model(X)
loss = criterion(yhat, y)
loss.backward()
optimizer.step()
return loss.item()
def predict(model, X):
"""X (n_examples x n_features)"""
scores = model(X) # (n_examples x n_classes)
predicted_labels = scores.argmax(dim=-1) # (n_examples)
return predicted_labels
def evaluate(model, X, y):
"""
X (n_examples x n_features)
y (n_examples): gold labels
"""
model.eval()
y_hat = predict(model, X)
n_correct = (y == y_hat).sum().item()
n_possible = float(y.shape[0])
model.train()
return n_correct / n_possible
def countDistinct(arr):
res = 0
set_number = set()
n = len(arr)
arr = arr.tolist()
# Pick all elements one by one
for i in range(1, n):
#Check if unique number is in set
if arr[i][0] in set_number:
continue
else:
set_number.add(arr[i][0])
res += 1
return res
def Scaler(X):
# Normalize the data, i.e., center and scale
mean_ = X.mean(0)
std_ = torch.std(X, axis=0)
X_ = (X - mean_) / std_
return X_
class ClassificationDataset(torch.utils.data.Dataset):
def __init__(self, data):
"""
data: the dict returned by utils.load_classification_data
"""
train_X, train_y = data["train"]
#dev_X, dev_y = data["dev"]
test_X, test_y = data["test"]
self.X = train_X.clone().detach().requires_grad_(True)
self.y = train_y.clone().detach().requires_grad_(True)#torch.tensor(train_y, dtype=torch.long)
#self.dev_X = torch.tensor(dev_X, dtype=torch.float32)
#self.dev_y = torch.tensor(dev_y, dtype=torch.long)
self.test_X = test_X.clone().detach().requires_grad_(True)#torch.tensor(test_X, dtype=torch.float32)
self.test_y = test_y.clone().detach().requires_grad_(True)#torch.tensor(test_y, dtype=torch.long)
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-model', default='ffn',
choices=['logistic_regression', 'ffn'],
help="Which model should the script run?")
parser.add_argument('-epochs', default=20, type=int,
help="""Number of epochs to train for. You should not
need to change this value for your plots.""")
parser.add_argument('-batch_size', default=1, type=int,
help="Size of training batch.")
parser.add_argument('-learning_rate', type=float, default=0.01)
parser.add_argument('-l2_decay', type=float, default=0)
parser.add_argument('-hidden_sizes', type=int, default=200)
parser.add_argument('-layers', type=int, default=1)
parser.add_argument('-dropout', type=float, default=0.3)
parser.add_argument('-activation',
choices=['tanh', 'relu'], default='tanh')
parser.add_argument('-optimizer',
choices=['sgd', 'adam'], default='sgd')
opt = parser.parse_args()
utils.configure_seed(seed=42)
freq_range = loadmat('freq_range.mat')['freq_range']
X = np.array(loadmat('X.mat')['X'])
Y = np.array(loadmat('Y_class.mat')['Y_class'])
X = torch.tensor(X,dtype=torch.cfloat, requires_grad=True)
Y = torch.tensor(Y,dtype=torch.float, requires_grad=True)
n_classes = torch.unique(Y).shape[0] # 2 classes - Litter or No Litter
print(n_classes)
n_feats = X.shape[1]
# =============================================================================
#X_pca = loadmat('X.mat')['X']
#K_fold = [0, 2, 86, 41, 97]
K_fold = 5
skf = StratifiedKFold(n_splits=K_fold, random_state=1, shuffle=True)
skf.get_n_splits(X, Y)
avg_mis_class_error_1HL = 0
avg_mis_class_error_2HL = 0
#Scale the data
X_scaled = Scaler(X)
# initialize the model
model = FeedforwardNetwork2(
n_classes, n_feats,
opt.hidden_sizes, opt.layers,
opt.activation, opt.dropout)
# get an optimizer
optims = {"adam": torch.optim.Adam, "sgd": torch.optim.SGD}
optim_cls = optims[opt.optimizer]
optimizer = optim_cls(
model.parameters(),
lr=opt.learning_rate,
weight_decay=opt.l2_decay)
# get a loss criterion
criterion = nn.BCELoss()
# Model questions = https://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw
for train_index, test_index in skf.split(X.detach().numpy(), Y.detach().numpy()):
#for k in K_fold:
# Split into training and test data
#X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=k)
X_train, dev_X = X_scaled[train_index], X_scaled[test_index]
y_train, dev_y = Y[train_index], Y[test_index]
data = {"train": (X_train, y_train),
"test": (dev_X, dev_y)}
dataset=ClassificationDataset(data)
train_dataloader = DataLoader(
dataset, batch_size=opt.batch_size, shuffle=True)
#dataset = utils.ClassificationDataset(data)
#train_dataloader = DataLoader(
# dataset, batch_size=opt.batch_size, shuffle=True)
# training loop
epochs = torch.arange(1, opt.epochs + 1)
train_mean_losses = []
valid_accs = []
train_losses = []
for ii in epochs:
print('Training epoch {}'.format(ii))
#for i in range(len(y_train)):
# loss = train_batch(
# X_train[i], y_train[i], model, optimizer, criterion)
# train_losses.append(loss)
for X_batch, y_batch in train_dataloader:
loss = train_batch(
X_batch, y_batch, model, optimizer, criterion)
train_losses.append(loss)
mean_loss = torch.tensor(train_losses).mean().item()
print('Training loss: %.4f' % (mean_loss))
train_mean_losses.append(mean_loss)
valid_accs.append(evaluate(model, dev_X, dev_y))
print('Valid acc: %.4f\n' % (valid_accs[-1]))
if __name__ == '__main__':
main()