Chatbot Tutorial problem

715456786b674e5bf49e · November 8, 2018, 9:30am

Hi,

I’m new to pytorch and have been following the many tutorials available.
But, When I did The CHATBOT TUTORIAL
(Chatbot Tutorial — PyTorch Tutorials 2.2.0+cu121 documentation) is not work.

Like the figure below

What should I do and what is causing this?

USE_CUDA = torch.cuda.is_available()
device = torch.device(“cuda” if USE_CUDA else “cpu”)

corpus_name = “cornell movie-dialogs corpus”
corpus = os.path.join(“data”, corpus_name)

def printLines(file, n=10):
with open(file, ‘rb’) as datafile:
lines = datafile.readlines()
for line in lines[:n]:
print(line)

#printLines(os.path.join(corpus, “movie_lines.txt”))

Splits each line of the file into a dictionary of fields

def loadLines(fileName, fields):
lines = {}
with open(fileName, ‘r’, encoding=‘iso-8859-1’) as f:
for line in f:
values = line.split(" +++$+++ ")
# Extract fields
lineObj = {}
for i, field in enumerate(fields):
lineObj[field] = values[i]
lines[lineObj[‘lineID’]] = lineObj
return lines

Groups fields of lines from `loadLines` into conversations based on movie_conversations.txt

def loadConversations(fileName, lines, fields):
conversations =
with open(fileName, ‘r’, encoding=‘iso-8859-1’) as f:
for line in f:
values = line.split(" +++$+++ ")
# Extract fields
convObj = {}
for i, field in enumerate(fields):
convObj[field] = values[i]
# Convert string to list (convObj[“utteranceIDs”] == “[‘L598485’, ‘L598486’, …]”)
lineIds = eval(convObj[“utteranceIDs”])
# Reassemble lines
convObj[“lines”] =
for lineId in lineIds:
convObj[“lines”].append(lines[lineId])
conversations.append(convObj)
return conversations

Extracts pairs of sentences from conversations

def extractSentencePairs(conversations):
qa_pairs =
for conversation in conversations:
# Iterate over all the lines of the conversation
for i in range(len(conversation[“lines”]) - 1): # We ignore the last line (no answer for it)
inputLine = conversation[“lines”][i][“text”].strip()
targetLine = conversation[“lines”][i+1][“text”].strip()
# Filter wrong samples (if one of the lists is empty)
if inputLine and targetLine:
qa_pairs.append([inputLine, targetLine])
return qa_pairs

datafile = os.path.join(corpus, “formatted_movie_lines.txt”)

delimiter = ‘\t’

Unescape the delimiter

delimiter = str(codecs.decode(delimiter, “unicode_escape”))

lines = {}
conversations =
MOVIE_LINES_FIELDS = [“lineID”, “characterID”, “movieID”, “character”, “text”]
MOVIE_CONVERSATIONS_FIELDS = [“character1ID”, “character2ID”, “movieID”, “utteranceIDs”]

Load lines and process conversations

print(“\nProcessing corpus…”)
lines = loadLines(os.path.join(corpus, “movie_lines.txt”), MOVIE_LINES_FIELDS)
print(“\nLoading conversations…”)
conversations = loadConversations(os.path.join(corpus, “movie_conversations.txt”),
lines, MOVIE_CONVERSATIONS_FIELDS)

Write new csv file

print(“\nWriting newly formatted file…”)
with open(datafile, ‘w’, encoding=‘utf-8’) as outputfile:
writer = csv.writer(outputfile, delimiter=delimiter)
for pair in extractSentencePairs(conversations):
writer.writerow(pair)

Print a sample of lines

print(“\nSample lines from file:”)
printLines(datafile)

Default word tokens

PAD_token = 0 # Used for padding short sentences
SOS_token = 1 # Start-of-sentence token
EOS_token = 2 # End-of-sentence token

class Voc:
def init(self, name):
self.name = name
self.trimmed = False
self.word2index = {}
self.word2count = {}
self.index2word = {PAD_token: “PAD”, SOS_token: “SOS”, EOS_token: “EOS”}
self.num_words = 3 # Count SOS, EOS, PAD

def addSentence(self, sentence):
    for word in sentence.split(' '):
        self.addWord(word)

def addWord(self, word):
    if word not in self.word2index:
        self.word2index[word] = self.num_words
        self.word2count[word] = 1
        self.index2word[self.num_words] = word
        self.num_words += 1
    else:
        self.word2count[word] += 1

# Remove words below a certain count threshold
def trim(self, min_count):
    if self.trimmed:
        return
    self.trimmed = True

    keep_words = []

    for k, v in self.word2count.items():
        if v >= min_count:
            keep_words.append(k)

    print('keep_words {} / {} = {:.4f}'.format(
        len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
    ))

    # Reinitialize dictionaries
    self.word2index = {}
    self.word2count = {}
    self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
    self.num_words = 3 # Count default tokens

    for word in keep_words:
        self.addWord(word)

MAX_LENGTH = 10 # Maximum sentence length to consider

Turn a Unicode string to plain ASCII, thanks to

What is the best way to remove accents (normalize) in a Python unicode string? - Stack Overflow

def unicodeToAscii(s):
return ‘’.join(
c for c in unicodedata.normalize(‘NFD’, s)
if unicodedata.category(c) != ‘Mn’
)

Lowercase, trim, and remove non-letter characters

def normalizeString(s):
s = unicodeToAscii(s.lower().strip())
s = re.sub(r"([.!?])“, r” \1", s)
s = re.sub(r"[^a-zA-Z.!?]+“, r” “, s)
s = re.sub(r”\s+“, r” ", s).strip()
return s

Read query/response pairs and return a voc object

def readVocs(datafile, corpus_name):
print(“Reading lines…”)
# Read the file and split into lines
lines = open(datafile, encoding=‘utf-8’).
read().strip().split(‘\n’)
# Split every line into pairs and normalize
pairs = [[normalizeString(s) for s in l.split(‘\t’)] for l in lines]
voc = Voc(corpus_name)
return voc, pairs

def filterPair(p):
# Input sequences need to preserve the last word for EOS token
return len(p[0].split(’ ‘)) < MAX_LENGTH and len(p[1].split(’ ')) < MAX_LENGTH

Filter pairs using filterPair condition

def filterPairs(pairs):
return [pair for pair in pairs if filterPair(pair)]

Using the functions defined above, return a populated voc object and pairs list

def loadPrepareData(corpus, corpus_name, datafile, save_dir):
print(“Start preparing training data …”)
voc, pairs = readVocs(datafile, corpus_name)
print(“Read {!s} sentence pairs”.format(len(pairs)))
pairs = filterPairs(pairs)
print(“Trimmed to {!s} sentence pairs”.format(len(pairs)))
print(“Counting words…”)
for pair in pairs:
voc.addSentence(pair[0])
voc.addSentence(pair[1])
print(“Counted words:”, voc.num_words)
return voc, pairs

save_dir = os.path.join(“data”, “save”)
voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir)

Print some pairs to validate

print(“\npairs:”)
for pair in pairs[:10]:
print(pair)

File “”, line 1, in
runfile(‘C:/Users/lab723/Desktop/glove1005/untitled0.py’, wdir=‘C:/Users/lab723/Desktop/glove1005’)

File “C:\Users\lab723\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py”, line 705, in runfile
execfile(filename, namespace)

File “C:\Users\lab723\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py”, line 102, in execfile
exec(compile(f.read(), filename, ‘exec’), namespace)

File “C:/Users/lab723/Desktop/glove1005/untitled0.py”, line 219, in
voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir)

File “C:/Users/lab723/Desktop/glove1005/untitled0.py”, line 209, in loadPrepareData
pairs = filterPairs(pairs)

File “C:/Users/lab723/Desktop/glove1005/untitled0.py”, line 202, in filterPairs
return [pair for pair in pairs if filterPair(pair)]

File “C:/Users/lab723/Desktop/glove1005/untitled0.py”, line 202, in
return [pair for pair in pairs if filterPair(pair)]

File “C:/Users/lab723/Desktop/glove1005/untitled0.py”, line 198, in filterPair
return len(p[0].split(’ ‘)) < MAX_LENGTH and len(p[1].split(’ ')) < MAX_LENGTH

IndexError: list index out of range

Brando_Miranda · July 18, 2019, 4:26pm

I say this with my best interest in mind, but nobody is going to be able to help you with this. Make the question shorter or at least write a more meaningful title…short concise title. No one wants to read a novel. We want to help but your not making it easy for us…