Please help me to resolve this error

I am trying to implement sequence to sequence pytorch model on my dataset. The only difference is that my model work on words rather than sentences. So input will be a character.

from future import unicode_literals, print_function, division

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import test_1.preprocessing as tp

device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)

pre = tp.Preprocess(file_name = ‘data/file1.txt’)
root_word, inflected_word, features, max_length, vocabs = pre.file_reader()
print(max_length)
training_pair = pre.training_pair(root_word, inflected_word, features, max_length, source_reverse=False)

word_pair = [(training_pair[0][i],training_pair[1][i]) for i in range(len(training_pair[0]))]
MAX_LENGTH = max_length
print(‘Total %d Word pair…’ % len(word_pair))

# print(word_pair)

def tensorFromSentence(sentence_index):
return torch.tensor(sentence_index, dtype=torch.long, device=device).view(-1, 1)

def tensorsFromPair(pair):
input_tensor = tensorFromSentence(pair[0])
target_tensor = tensorFromSentence(pair[1])
return (input_tensor, target_tensor)

class EncoderRNN(nn.Module):
def init(self, input_size, hidden_size):
super(EncoderRNN, self).init()
self.hidden_size = hidden_size

self.embedding = nn.Embedding(input_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size)

def forward(self, input, hidden):
embedded = self.embedding(input).view(1, 1, -1)
output = embedded
output, hidden = self.gru(output, hidden)
return output, hidden

def initHidden(self):
return torch.zeros(1, 1, self.hidden_size, device=device)

class AttnDecoderRNN(nn.Module):
def init(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
super(AttnDecoderRNN, self).init()
self.hidden_size = hidden_size
self.output_size = output_size
self.dropout_p = dropout_p
self.max_length = max_length

self.embedding = nn.Embedding(self.output_size, self.hidden_size)
self.attn = nn.Linear(self.hidden_size 2, self.max_length)
self.attn_combine = nn.Linear(self.hidden_size
2, self.hidden_size)
self.dropout = nn.Dropout(self.dropout_p)
self.gru = nn.GRU(self.hidden_size, self.hidden_size)
self.out = nn.Linear(self.hidden_size, self.output_size)

def forward(self, input, hidden, encoder_outputs):
embedded = self.embedding(input).view(1, 1, -1)
embedded = self.dropout(embedded)

attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))

output = torch.cat((embedded[0], attn_applied[0]), 1)
output = self.attn_combine(output).unsqueeze(0)

output = F.relu(output)
output, hidden = self.gru(output, hidden)

output = F.log_softmax(self.out(output[0]), dim=1)
return output, hidden, attn_weights

def initHidden(self):
return torch.zeros(1, 1, self.hidden_size, device=device)

teacher_forcing_ratio = 0.5

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
encoder_hidden = encoder.initHidden()

encoder_optimizer.zero_grad()
decoder_optimizer.zero_grad()

input_length = input_tensor.size(0)
target_length = target_tensor.size(0)

encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

loss = 0

for ei in range(input_length):
encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
encoder_outputs[ei] = encoder_output[0, 0]

decoder_input = torch.tensor([[pre.char_to_index[’<SOS>’]]], dtype=torch.long, device=device)

decoder_hidden = encoder_hidden

# use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
use_teacher_forcing = True

if use_teacher_forcing:
# Teacher forcing: Feed the target as the next input
for di in range(target_length):
decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
loss += criterion(decoder_output, target_tensor[di])
decoder_input = target_tensor[di] # Teacher forcing

else:
# Without teacher forcing: use its own predictions as the next input
for di in range(target_length):
decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
topv, topi = decoder_output.topk(1)
decoder_input = topi.squeeze().detach() # detach from history as input

loss += criterion(decoder_output, target_tensor[di])
if decoder_input.item() == pre.char_to_index[’<EOS>’]:
break

loss.backward()

encoder_optimizer.step()
decoder_optimizer.step()

return loss.item() / target_length

import time
import math

import matplotlib.pyplot as plt
# plt.switch_backend(‘agg’)
import matplotlib.ticker as ticker
import numpy as np

def asMinutes(s):
m = math.floor(s / 60)
s -= m * 60
return ‘%dm %ds’ % (m, s)

def timeSince(since, percent):
now = time.time()
s = now - since
es = s / (percent)
rs = es - s
return ‘%s (- %s)’ % (asMinutes(s), asMinutes(rs))

def showPlot(points):
plt.figure()
fig, ax = plt.subplots()
# this locator puts ticks at regular intervals
loc = ticker.MultipleLocator(base=0.2)
ax.yaxis.set_major_locator(loc)
plt.plot(points)

def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
start = time.time()
plot_losses = []
print_loss_total = 0 # Reset every print_every
plot_loss_total = 0 # Reset every plot_every

encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
training_pairs = [tensorsFromPair(word_pair[i]) for i in range(len(word_pair))]

criterion = nn.NLLLoss()

for iter in range(1, n_iters + 1):
training_pair = training_pairs[iter - 1]
input_tensor = training_pair[0]
target_tensor = training_pair[1]

loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
print_loss_total += loss
plot_loss_total += loss

if iter % print_every == 0:
print_loss_avg = print_loss_total / print_every
print_loss_total = 0
print(’%s (%d %d%%) %.4f’ % (timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg))

if iter % plot_every == 0:
plot_loss_avg = plot_loss_total / plot_every
plot_losses.append(plot_loss_avg)
plot_loss_total = 0

showPlot(plot_losses)

hidden_size = 50
encoder1 = EncoderRNN(len(vocabs)-1, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, len(vocabs)-1, dropout_p=0.1).to(device)

print(encoder1)
print(attn_decoder1)

trainIters(encoder1, attn_decoder1, 10, print_every=2, plot_every=2)
.
.
.
.
.
Ouput:
+++++++++
Maximum Length of a word: 14
Dict is:
{’’: 1, ‘’: 2, ‘ह’: 3, ‘ी’: 4, ‘ू’: 5, ‘क’: 6, ‘र’: 7, ‘ा’: 8, ’ ': 9, ‘ब’: 10, ‘त’: 11, ‘ं’: 12, ‘ो’: 13, ‘न’: 14, ‘म’: 15, ‘ु’: 16, ‘ज’: 17, ‘स’: 18, ‘ँ’: 19, ‘ग’: 20, ‘ै’: 21, ‘घ’: 22, ‘प’: 23, ‘च’: 24, ‘ल’: 25, ‘े’: 26, ‘उ’: 27}
Character and feature dict Ready…
14
Total 5 Word pair…
EncoderRNN(
(embedding): Embedding(26, 50)
(gru): GRU(50, 50)
)
AttnDecoderRNN(
(embedding): Embedding(26, 50)
(attn): Linear(in_features=100, out_features=14, bias=True)
(attn_combine): Linear(in_features=100, out_features=50, bias=True)
(dropout): Dropout(p=0.1)
(gru): GRU(50, 50)
(out): Linear(in_features=50, out_features=26, bias=True)
)
Traceback (most recent call last):
File “/home/rajesh/@Pycharm/morph_reinflection/test_1/temp.py”, line 217, in
trainIters(encoder1, attn_decoder1, 10, print_every=2, plot_every=2)
File “/home/rajesh/@Pycharm/morph_reinflection/test_1/temp.py”, line 194, in trainIters
loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
File “/home/rajesh/@Pycharm/morph_reinflection/test_1/temp.py”, line 109, in train
encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
File “/home/rajesh/@Pycharm/morph_reinflection/conda_venv/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 491, in call
result = self.forward(*input, **kwargs)
File “/home/rajesh/@Pycharm/morph_reinflection/test_1/temp.py”, line 48, in forward
output, hidden = self.gru(output, hidden)
File “/home/rajesh/@Pycharm/morph_reinflection/conda_venv/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 491, in call
result = self.forward(*input, **kwargs)
File “/home/rajesh/@Pycharm/morph_reinflection/conda_venv/lib/python3.6/site-packages/torch/nn/modules/rnn.py”, line 192, in forward
output, hidden = func(input, self.all_weights, hx, batch_sizes)
File “/home/rajesh/@Pycharm/morph_reinflection/conda_venv/lib/python3.6/site-packages/torch/nn/_functions/rnn.py”, line 323, in forward
return func(input, *fargs, **fkwargs)
File “/home/rajesh/@Pycharm/morph_reinflection/conda_venv/lib/python3.6/site-packages/torch/nn/_functions/rnn.py”, line 287, in forward
dropout_ts)
RuntimeError: CUDNN_STATUS_EXECUTION_FAILED