I just wrote my first pyTorch code and have some questions.

```
#running on a Tesla V100-SXM2-32GB
device = torch.device('cuda')
```

Train, validation and test matrices are read in as sparse matrices.

```
#load data
print('Sparse matrices:')
train_pd = pd.read_pickle(DIR_INST+'/{}_train.pkl'.format(INST))
print(' train_pd: {:,} x {:,}'.format(*train_pd.shape))
train_csc = load_npz(DIR_INST+'/{}_train{}.npz'.format(INST,SFX))
print(' train_csc: {:,} x {:,}'.format(*train_csc.shape))
#same for validation and test
#...
```

Making the instance dense before converting to pyTorch tensor.

```
class MyDataset(torch.utils.data.Dataset):
def __init__(self, X_csc, obs_pd):
self.X = torch.tensor(X_csc.toarray(), dtype=torch.float32, device=device)
self.y = torch.tensor(obs_pd['is_case'].values, dtype=torch.float32, device=device)
def __getitem__(self, index):
return self.X[index,:], self.y[index]
def __len__(self):
return self.X.shape[0]
data_train = MyDataset(train_csc,train_pd)
data_valid = MyDataset(valid_csc,valid_pd)
train_loader = torch.utils.data.DataLoader(dataset=data_train, batch_size=511, shuffle=True)
```

Flexible class for MLP so that number and size of layers, and dropout probs can be varied.

```
class MLP(torch.nn.Module):
def __init__(self, inputsz, hidden, drop):
super(MLP, self).__init__()
widths = [inputsz] + hidden
self.linears = torch.nn.ModuleList()
for n in range(len(widths)-1):
self.linears.append(torch.nn.Linear(widths[n],widths[n+1]))
self.linears.append(torch.nn.ReLU())
self.linears.append(torch.nn.Dropout(drop[n]))
self.linears.append(torch.nn.Linear(widths[-1],1))
self.linears.append(torch.nn.Sigmoid())
def forward(self, x):
for n in range(len(self.linears)):
x = self.linears[n](x)
return x
#example model
model = MLP(train_csc.shape[1],[63],[.5])
print(model)
model = model.to(device)
```

Model:

```
MLP(
(linears): ModuleList(
(0): Linear(in_features=1911, out_features=63, bias=True)
(1): ReLU()
(2): Dropout(p=0.5)
(3): Linear(in_features=63, out_features=1, bias=True)
(4): Sigmoid()
)
)
```

```
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=.0001)
```

Computing the f1 score for training and validation at the end of every epoch. `f1_prev`

is the f1 score computed at prevalence. Example: in 10 observations suppose there are 3 cases, then prevalence is .3. So, set the three highest scores as cases and then compute tp, fp, etc. and finally f1.

```
def f1_prev(label, score):
ncase = label.sum().astype(np.uint64)
ncont = (label.shape[0]-ncase).astype(np.uint64)
newlab = label[np.argsort(score, kind='mergesort')]
tp,fp = newlab[ncont:].sum(),newlab[:ncont].sum()
fn,tn = ncase-tp,ncont-fp
return 2.*tp/(2.*tp+fn+fp)
for epoch in range(200):
#training
for X_mb,y_mb in train_loader:
yhat_mb = model(X_mb)
loss = criterion(yhat_mb[:,0], y_mb)
optimizer.zero_grad()
loss.backward()
optimizer.step()
yhat_train = model(data_train.X).detach().cpu().numpy().ravel()
yhat_valid = model(data_valid.X).detach().cpu().numpy().ravel()
print(epoch, loss.item(), f1_prev(train_pd['is_case'].values, yhat_train),
f1_prev(valid_pd['is_case'].values, yhat_valid))
```

Questions:

- as I noted earlier, this is the first time I am using pyTorch. I have this running in Keras and that results in a f1_valid score which is almost 2% higher. Here is the keras network
`('learning_rate': 0.0001, 'batch_size': 511)`

```
Layer (type) Output Shape Param #
inp (InputLayer) (None, 1911) 0
_________________________________________________________________
drop0 (Dropout) (None, 1911) 0
_________________________________________________________________
hide1 (Dense) (None, 63) 120456
_________________________________________________________________
drop1 (Dropout) (None, 63) 0
_________________________________________________________________
out (Dense) (None, 1) 64
```

```
model.compile(optimizer=keras.optimizers.Adam(lr=.0001), loss='binary_crossentropy')
```

Any idea what the reason may be?

- how can I speed up the training above?

For example- can the sparse matrices be used without converting to dense using toarray()?
- can the f1 score computed as shown above be pushed to the GPU?

Or any other way?

Thanks.