Hi,
Here is a snippet of the code I have implemented.
class BiDAF(nn.Module):
def __init__(self,char_embeddings,glove_model):
super(BiDAF,self).__init__()
self.context_highway = Context_Highway()
self.question_highway = Question_Highway()
self.char_embedding_dict = char_embeddings
self.glove_model = glove_model
self.tanh = nn.Tanh()
self.train_w = torch.randn(420,requires_grad = True)
self.w_p1 = torch.randn(700,requires_grad = True)
self.w_p2 = torch.randn(700,requires_grad = True)
self.bias = torch.randn(1,requires_grad = True)
self.conv1 = nn.Conv2d(1,5,kernel_size = (50,3))
self.conv2 = nn.Conv2d(1,5,kernel_size = (50,4))
self.conv3 = nn.Conv2d(1,3,kernel_size = (50,2))
self.conv4 = nn.Conv2d(1,7,kernel_size = (50,3))
self.context_bilstm = nn.LSTM(70, 70,bidirectional = True)
self.question_bilstm = nn.LSTM(70, 70,bidirectional = True)
self.bilstm_m1 = nn.LSTM(560, 70,bidirectional = True)
self.bilstm_m2 = nn.LSTM(140, 70,bidirectional = True)
..................................
bidaf = BiDAF(char_embeddings, glove_model)
bidaf = bidaf.to(device)
criterion = nn.CrossEntropyLoss()
lr = 0.01
optimizer = torch.optim.Adam(bidaf.parameters(),lr = lr)
max_data = len(dataset['data'])
epochs = 2
iterations_per_epochs = 10000
sum_loss = 0
for epoch in range(epochs):
for iteration in range(1,iterations_per_epochs+1):
index = random.randint(0, max_data)
context = dataset['data'][index]['context']
question = dataset['data'][index]['question']
start_index = torch.tensor([dataset['data'][index]['start_index']], dtype=torch.int64)
end_index = torch.tensor([dataset['data'][index]['end_index']], dtype=torch.int64)
start_index = start_index.to(device)
end_index = end_index.to(device)
pred = bidaf.forward(context, question)
start_pred = pred[0]
end_pred = pred[1]
loss1 = criterion(start_pred.expand(1,810), start_index.expand(1))
loss2 = criterion(end_pred.expand(1,810), end_index.expand(1))
loss = loss1 + loss2
optimizer.zero_grad()
loss.backward()
sum_loss += loss.item()
optimizer.step()
print("Epoch---{} Loss---{}".format(epoch,sum_loss))
sum_loss = 0
The above code works fine if I am running it on the CPU. When I run it on GPU it gives a run time error where expected was a torch.FloatTensor but found type torch.cuda.FloatTensor.
So I had to edit the code to be like this
class BiDAF(nn.Module):
def __init__(self,char_embeddings,glove_model,device):
super(BiDAF,self).__init__()
self.device = device
self.context_highway = Context_Highway().to(device)
self.question_highway = Question_Highway().to(device)
self.char_embedding_dict = char_embeddings
self.glove_model = glove_model
self.tanh = nn.Tanh()
self.train_w = torch.randn(420,requires_grad = True).to(device)
self.w_p1 = torch.randn(700,requires_grad = True).to(device)
self.w_p2 = torch.randn(700,requires_grad = True).to(device)
self.bias = torch.randn(1,requires_grad = True).to(device)
self.conv1 = nn.Conv2d(1,5,kernel_size = (50,3))
self.conv2 = nn.Conv2d(1,5,kernel_size = (50,4))
self.conv3 = nn.Conv2d(1,3,kernel_size = (50,2))
self.conv4 = nn.Conv2d(1,7,kernel_size = (50,3))
self.context_bilstm = nn.LSTM(70, 70,bidirectional = True)
self.question_bilstm = nn.LSTM(70, 70,bidirectional = True)
self.bilstm_m1 = nn.LSTM(560, 70,bidirectional = True)
self.bilstm_m2 = nn.LSTM(140, 70,bidirectional = True)
...................................
bidaf = BiDAF(char_embeddings, glove_model, device)
bidaf = bidaf.to(device)
criterion = nn.CrossEntropyLoss()
lr = 0.01
optimizer = torch.optim.Adam(bidaf.parameters(),lr = lr)
Other than the variables inside the constructor, I had to move any local variables I initialized in the methods to GPU too. I noticed while running the code that the GPU one is slower than the CPU code. Is there any other way to move all the tensors to GPU?. Why is it slower than the CPU ?
I know I have only posted a portion of code, I didn’t want to make the post unnecessarily long. I hope this will be sufficient. Thanks for any help in advance.