# Is converting categories to one hot faster in Numpy than doing it in Data Loader?

Hi,
Is it possible that loops are faster than Data loader?

Earlier:

``````def one_hot_vector(x_raw, n_uniq):
#time_strt =  datetime.now()
input_len = x_raw.shape[0]
input_col_len = x_raw.shape[1]
x = np.zeros((input_len*input_col_len,n_uniq),dtype=np.int8)
x_raw = x_raw.reshape(-1,1)
for i in range(n_uniq):
ind, _ = np.where(x_raw == i)
x[ind, i] = 1
x = x.reshape(input_len,input_col_len, n_uniq)
x_raw = x_raw.reshape(input_len, input_col_len)
#print(f"Completed in {datetime.now()-time_strt}")
return x

for epoch in range(num_epochs):
epoch_time = datetime.now()
for i in range(0,x_train.shape[0],100000):
#strt_time = datetime.now()
one_hot_x_train = one_hot_vector(x_train[i:i+100000], 2983)
one_hot_x_train = torch.from_numpy(one_hot_x_train)
y_train_ = torch.from_numpy(y_train[i:i+100000].astype(np.int32))
for j in range(0,one_hot_x_train.shape[0], batch_size):

outputs = model(one_hot_x_train[j:j+batch_size].to(device).float())
loss = criterion(outputs, y_train_[j:j+batch_size].squeeze().to(device).long())

#Backward and optimize
loss.backward()
optimizer.step()

print(f"Epoch : {epoch}/{num_epochs} Train % : {(i+100000)/(x_train.shape[0])} Loss : {loss.item()} Loop Cost : {datetime.now()-strt_time} ")
print(f"Epoch time : {datetime.now()-epoch_time}")
``````

``````import torch
from torch.utils import data
import numpy as np
import torch.nn.functional as F

class Dataset_1(data.Dataset):
'Characterizes a dataset for PyTorch'
def __init__(self, list_IDs, labels, n_uniq):
'Initialization'
self.labels = labels
self.list_IDs = list_IDs
self.n_uniq = n_uniq

def __len__(self):
'Denotes the total number of samples'
return len(self.list_IDs)

def __getitem__(self, index):
'Generates one sample of data'
# Select sample

# Load data and get label
X = self.list_IDs[index]
X = F.one_hot(torch.tensor(X).to(torch.int64), num_classes = self.n_uniq)
y = self.labels[index]

return X, y

params = {'batch_size': 50,
'shuffle': True,
'num_workers': 50}

training_set = Dataset_1(x_train, y_train, 2983 )

validation_set = Dataset_1(x_test, y_test, 2983)

for epoch in range(num_epochs):
epoch_time = datetime.now()
# Training
print('Training Start')
counter = 0
for local_batch, local_labels in training_generator:
# Transfer to GPU
counter +=1
local_batch, local_labels = local_batch.to(device).float(), local_labels.to(device).long()

outputs = model(local_batch)
loss = criterion(outputs,local_labels)
loss.backward()
optimizer.step()
if counter%10000 == 0: print(f"Counter : {counter} || Loss : {loss.item()}")
print(f"Epoch time : {datetime.now()-epoch_time} || Loss : {loss.item()}")
``````

I have a huge dataset (200M sample) and I do not have exact times but the time to run 2 epochs almost got doubled with the data loader.

Also the model Loss doesnâ€™t seem to improve much it remains between 4 and 5 if I use ADM optimiser and 2 and 3 if I use SGC optimiser, what all could I try for the LSTM models for improving the accuracy.

Here is the model parameters and Model that I am using :

``````
# Hyper-parameters
sequence_length = 10
input_size = 2983
hidden_size = 128
num_layers = 4
num_classes = 100
num_epochs = 2
learning_rate = 0.1
``````
``````# Recurrent neural network (many-to-one)
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers

self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
#self.lsmax = nn.LogSoftmax(hidden_size,hidden_size)
self.fc = nn.Linear(hidden_size, num_classes)
#self.fc_ = nn.Linear(num_classes, num_classes)
#self.fc_2 = nn.Linear(num_classes, num_classes)

def forward(self, x):
# Set initial hidden and cell states
h0 = torch.randn(self.num_layers, x.size(0), self.hidden_size).to(device)
c0 = torch.randn(self.num_layers, x.size(0), self.hidden_size).to(device)

#         h1 = torch.randn(1, self.hidden_size, self.hidden_size).to(device)
#         c1 = torch.randn(1, self.hidden_size, self.hidden_size).to(device)
# Forward propagate LSTM

out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)

#out_, _ = self.lsmax(out,(h1,c1))
# Decode the hidden state of the last time step

out_ = self.fc(out[:, -1, :])
#out_ = self.fc_(out)
#out__ =  self.fc_2(out_)

return out_

model = RNN(input_size, hidden_size, num_layers, num_classes).cuda()
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
The manual loop might be faster, since you are just slicing the tensor, while your `Dataset` copies the data.
Try to use `torch.from_numpy` in your `__getitem__` and compare the results again.