Hi
I have a neural network in which I am trying to implement multi-GPU capability with
model=torch.nn.DataParallel(model, device_ids=[0])
(currently I’m trying to get it to work on a computer with only one GPU, thus why device_ids=[0]
.)
But I get the error:
RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.
Stemming from:
loss.backward()
Worth noting, without implementing multi-GPU the code works fine with no errors.
(To implement multi-GPU I also had to switch some lines of code from model.something, to model.module.something, which could be the source of the problem)
The full code, in all its chaos:
import os
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-0.3.1-py36_cuda80_cudnn6he774522_2\Lib\site-packages')
import torch
import torch.nn as nn
from torch import optim
import torch.autograd as autograd
import time, random
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\tqdm-master')
from tqdm import tqdm
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-sentiment-classification-master')
import bilstm
import lstm
from lstm import LSTMSentiment
from bilstm import BiLSTMSentiment
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\text-master')
from torchtext import data
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z')
import numpy as np
import argparse
import csv
import time
#import gensim
for x in range (1):
torch.set_num_threads(8)
torch.manual_seed(1)
random.seed(1)
#########################################################################
cuda = True
Difficulty = 'easy'
#########################################################################
#if Difficulty == 'easy':
def load_bin_vec(fname, vocab):
"""
Loads 300x1 word vecs from Google (Mikolov) word2vec
"""
# count = 0
word_vecs = {}
with open(fname, "rb") as f:
header = f.readline()
vocab_size, layer1_size = map(int, header.split())
binary_len = np.dtype('float32').itemsize * layer1_size
for line in range(vocab_size):
word = []
while True:
ch = f.read(1).decode('latin-1')
if ch == ' ':
word = ''.join(word)
break
if ch != '\n':
word.append(ch)
if word in vocab:
word_vecs[word] = np.fromstring(f.read(binary_len), dtype='float32')
else:
f.read(binary_len)
# count = count+1
# print(count)
return word_vecs
#if Difficulty == 'hard':
# def load_bin_vec(fname, vocab):
# count = 0
# success = 0
# """
# Loads 300x1 word vecs from Google (Mikolov) word2vec
# """
# word_vecs = {}
# w2v_model = gensim.models.KeyedVectors.load_word2vec_format(fname, binary=True)
#
# from gensim.models import word2vec
# w2v_model = word2vec.Word2Vec.load(fname)
# print(w2v_model.similarity('film', 'movie'))
#
# for word in vocab:
# print(word)
# here = 0
# find = 0
# for letter in word:
# here = here + 1
# if letter == ':':
# find = here
# try:
# word_vecs[word] = w2v_model[word[0:find]]
# success = success + 1
# except KeyError:
# count = count + 1
# some tokens from the dataset may not appear as an entry in the word embeds matrix, so i print and skip them
# print('Key error: {}'.format(word))
# continue
# print(count, 'problematic words', success, 'successful words')
# return word_vecs
def get_accuracy(truth, pred):
assert len(truth) == len(pred)
right = 0
for i in range(len(truth)):
if truth[i] == pred[i]:
right += 1.0
return right / len(truth)
def train_epoch_progress(model, train_iter, loss_function, optimizer, text_field, label_field, epoch):
model.train()
avg_loss = 0.0
truth_res = []
pred_res = []
count = 0
for batch in tqdm(train_iter, desc='Train epoch '+str(epoch+1)):
sent, label = batch.text, batch.label
label.data.sub_(1)
truth_res += list(label.data)
model.batch_size = len(label.data)
model.epoch = epoch+1
model.hidden = model.module.init_hidden()
model.count = count
pred = model.module(sent)
pred_label = pred.data.max(1)[1]
pred_res += [x for x in pred_label]
model.zero_grad()
loss = loss_function(pred, label)
avg_loss += loss.data[0]
count += 1
loss.backward()
optimizer.step()
avg_loss /= len(train_iter)
acc = get_accuracy(truth_res, pred_res)
# print(model.hidden)
return avg_loss, acc
def train_epoch(model, train_iter, loss_function, optimizer):
model.train()
avg_loss = 0.0
truth_res = []
pred_res = []
count = 0
for batch in train_iter:
sent, label = batch.text, batch.label
label.data.sub_(1)
truth_res += list(label.data)
model.batch_size = len(label.data)
model.hidden = model.module.init_hidden()
pred = model.module(sent)
pred_label = pred.data.max(1)[1].numpy()
pred_res += [x for x in pred_label]
model.zero_grad()
loss = loss_function(pred, label)
avg_loss += loss.data[0]
count += 1
loss.backward()
optimizer.step()
avg_loss /= len(train_iter)
acc = get_accuracy(truth_res, pred_res)
return avg_loss, acc
def evaluate(model, data, loss_function, name):
model.eval()
avg_loss = 0.0
truth_res = []
pred_res = []
for batch in data:
sent, label = batch.text, batch.label
label.data.sub_(1)
truth_res += list(label.data)
model.batch_size = len(label.data)
model.hidden = model.module.init_hidden()
pred = model.module(sent)
pred_label = pred.data.max(1)[1]
pred_res += [x for x in pred_label]
loss = loss_function(pred, label)
avg_loss += loss.data[0]
avg_loss /= len(data)
acc = get_accuracy(truth_res, pred_res)
print(name + ': loss %.2f acc %.1f' % (avg_loss, acc*100))
return acc
def load_sst(text_field, label_field, batch_size, Difficulty, cuda):
# for x in range(B_sizes):
if Difficulty == 'easy':
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-sentiment-classification-master copy/data')
train, dev, test = data.TabularDataset.splits(path='./SST2/', train='train.tsv',
validation='dev.tsv', test='test.tsv', format='tsv',
fields=[('text', text_field), ('label', label_field)])
if Difficulty == 'medium':
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-sentiment-classification-master')
train, dev, test = data.TabularDataset.splits(path='./data/Dataset/', train='train.tsv',
validation='test.tsv', test='test.tsv', format='tsv',
fields=[('text', text_field), ('label', label_field)])
if Difficulty == 'hard':
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z')
train, dev, test = data.TabularDataset.splits(path='./aclImdb/', train='usable_train.tsv',
validation='usable_dev.tsv', test='usable_test.tsv', format='tsv',
fields=[('text', text_field), ('label', label_field)])
text_field.build_vocab(train, dev, test)
label_field.build_vocab(train, dev, test)
# batch_size = 10*(.1*x*(torch.cos(torch.FloatTensor([x]))*torch.sin(torch.FloatTensor([2*x])))+.08*x).round() + 1
## for GPU run
if cuda == True:
train_iter, dev_iter, test_iter = data.BucketIterator.splits((train, dev, test),
batch_sizes=(batch_size, len(dev), len(test)), sort_key=lambda x: len(x.text), repeat=False, device=None)
else:
train_iter, dev_iter, test_iter = data.BucketIterator.splits((train, dev, test),
batch_sizes=(batch_size, len(dev), len(test)), sort_key=lambda x: len(x.text), repeat=False, device=-1)
return train_iter, dev_iter, test_iter
#def adjust_learning_rate(learning_rate, optimizer, epoch):
# lr = learning_rate * (0.9 ** (epoch //1))
# for param_group in optimizer.param_groups:
# param_group['lr'] = lr
# return optimizer
#for clustering
#if Difficulty == 'easy':
# EPOCHS= 30
# BATCH_SIZE = 30
# HIDDEN_DIM = 150
if Difficulty == 'easy':
EPOCHS= 1000
BATCH_SIZE = 5
HIDDEN_DIM = 100
#HIDDEN_DIM = round((150*(4/6)))
#HIDDEN_DIM = round((150*(4/5)))
#HIDDEN_DIM = round((150*(4/8)))
if Difficulty == 'medium':
EPOCHS= 30
BATCH_SIZE = 5
HIDDEN_DIM = 50
#HIDDEN_DIM = round((150*(4/6)))
#HIDDEN_DIM = round((150*(4/5)))
#HIDDEN_DIM = round((150*(4/8)))
if Difficulty == 'hard':
EPOCHS= 30
BATCH_SIZE = 5
HIDDEN_DIM = 50
USE_GPU = torch.cuda.is_available()
EMBEDDING_DIM = 300
timestamp = str(int(time.time()))
best_dev_acc = 0.0
#B_sizes = np.ones(BATCH_SIZE) #BATCH SIZE MUST BE MINIMUM OF 50
#for x in range(BATCH_SIZE):
# B_sizes[x]= 10*(.1*x*(torch.cos(torch.FloatTensor([x]))*torch.sin(torch.FloatTensor([2*x])))+.08*x).round() + 1
#B_sizes = max(B_sizes)
#def weights_init(LSTMSentiment):
# classname = LSTMSentiment.__class__.__name__
# if classname.find('LSTMSentiment') != -1:
# LSTMSentiment.weight.data.fill_(.5)
# LSTMSentiment.bias.data.fill_(0)
#Using = BiLSTMSentiment
Using = LSTMSentiment
if cuda:
torch.set_default_tensor_type('torch.cuda.FloatTensor')
print('Using: ', Using)
text_field = data.Field(lower=True)
label_field = data.Field(sequential=False)
train_iter, dev_iter, test_iter = load_sst(text_field, label_field, BATCH_SIZE, Difficulty = Difficulty, cuda = cuda)
count = 0
model = Using(embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, vocab_size=len(text_field.vocab),
use_gpu=cuda, label_size=len(label_field.vocab)-1, batch_size=BATCH_SIZE)
if cuda:
model = model.cuda()
print('Load word embeddings...')
# # glove
#text_field.vocab.load_vectors('glove.6B.100d')
#text_field.vocab.load_vectors('hard_dataset_embeddings')
# word2vector
word_to_idx = text_field.vocab.stoi
pretrained_embeddings = np.random.uniform(-0.25, 0.25, (len(text_field.vocab), EMBEDDING_DIM))
pretrained_embeddings[0] = 0
#if Difficulty == 'easy':
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-sentiment-classification-master')
word2vec = load_bin_vec('./data/GoogleNews-vectors-negative300.bin', word_to_idx)
#if Difficulty == 'hard':
# os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z')
# word2vec = load_bin_vec('hard_dataset_embeddings', word_to_idx)
for word, vector in word2vec.items():
pretrained_embeddings[word_to_idx[word]-1] = vector
# text_field.vocab.load_vectors(wv_type='', wv_dim=300)
model.embeddings.weight.data.copy_(torch.from_numpy(pretrained_embeddings))
# model.embeddings.weight.data = text_field.vocab.vectors
# model.embeddings.embed.weight.requires_grad = False
best_model = model
optimizer = optim.Adam(model.parameters(), lr=1e-3)
#optimizer = optim.Adam(model.parameters(), lr=1e-2,weight_decay=1e-5)
loss_function = nn.NLLLoss()
#loss_function = nn.CrossEntropyLoss()
model=torch.nn.DataParallel(model, device_ids=[0])
print('Training...')
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
time_start = time.time()
if not os.path.exists(out_dir):
os.makedirs(out_dir)
for epoch in range(EPOCHS):
# batch_modifer = epoch + 1
avg_loss, acc = train_epoch_progress(model, train_iter, loss_function, optimizer, text_field, label_field, epoch)
tqdm.write('Train: loss %.2f acc %.1f' % (avg_loss, acc*100))
dev_acc = evaluate(model, dev_iter, loss_function, 'Dev')
if dev_acc > best_dev_acc:
if best_dev_acc > 0:
os.system('rm '+ out_dir + '/best_model' + '.pth')
best_dev_acc = dev_acc
best_model = model
# torch.save(best_model.state_dict(), out_dir + '/best_model' + '.pth')
# evaluate on test with the best dev performance model
test_acc = evaluate(best_model, test_iter, loss_function, 'Test')
dev_acc = evaluate(model, dev_iter, loss_function, 'Dev')
test_acc = evaluate(best_model, test_iter, loss_function, 'Final Test')
time_finish = time.time()
total_time = time_finish - time_start
print('total time', (time_finish-time_start)/3600, 'hours')
total_time = (time_finish-time_start)/3600
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-sentiment-classification-master/output/')
for x in range(1001):
if os.path.isfile('out_'+str(x)+'.csv') == False:
ID = x
break
eval(str(np.savetxt('out_'+str(ID)+'.csv', np.array([dev_acc,test_acc,total_time]), delimiter=',')))
#eval("np.savetxt('out_pm_%i.csv', np.array([dev_acc,test_acc,total_time]), delimiter=',')" %id)
Any help would be greatly appreciated!
Thank you!
UPDATE:
The error goes away when I plug retain_graphs = True
into the two loss.backward() lines.
However, this causes the time per epoch to explode from 30 seconds to 20 minutes (understandably, considering what this command does). The whole thing I’m trying to achieve with the multi-GPU’s is faster run-times, so this is a problem.
Does someone perhaps know why the model=torch.nn.DataParallel(model, device_ids=[0])
line is forcing me to set retain_graphs = True'
?
I feel that this is something to do with the fact that I had to turn model.init_hidden
and model(sent)
into mode.module.init_hidden
and model.module(sent)
.
But without that addition, I was getting errors like:
AttributeError: 'DataParallel' object has no attribute 'init_hidden'
So I’m really not sure where to go from here…
SOLVED: (I think)
For anyone who is having a similar issue, my solution was to scrap the retain_graph = True
entirely, and simply changed almost every model
to model.module
WORKING CODE:
import os
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-0.3.1-py36_cuda80_cudnn6he774522_2\Lib\site-packages')
import torch
import torch.nn as nn
from torch import optim
import torch.autograd as autograd
import time, random
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\tqdm-master')
from tqdm import tqdm
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-sentiment-classification-master')
import bilstm
import lstm
from lstm import LSTMSentiment
from bilstm import BiLSTMSentiment
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\text-master')
from torchtext import data
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z')
import numpy as np
import argparse
import csv
import time
#import gensim
for x in range (1):
torch.set_num_threads(8)
torch.manual_seed(1)
random.seed(1)
#########################################################################
cuda = True
Difficulty = 'easy'
#########################################################################
#if Difficulty == 'easy':
def load_bin_vec(fname, vocab):
"""
Loads 300x1 word vecs from Google (Mikolov) word2vec
"""
# count = 0
word_vecs = {}
with open(fname, "rb") as f:
header = f.readline()
vocab_size, layer1_size = map(int, header.split())
binary_len = np.dtype('float32').itemsize * layer1_size
for line in range(vocab_size):
word = []
while True:
ch = f.read(1).decode('latin-1')
if ch == ' ':
word = ''.join(word)
break
if ch != '\n':
word.append(ch)
if word in vocab:
word_vecs[word] = np.fromstring(f.read(binary_len), dtype='float32')
else:
f.read(binary_len)
# count = count+1
# print(count)
return word_vecs
#if Difficulty == 'hard':
# def load_bin_vec(fname, vocab):
# count = 0
# success = 0
# """
# Loads 300x1 word vecs from Google (Mikolov) word2vec
# """
# word_vecs = {}
# w2v_model = gensim.models.KeyedVectors.load_word2vec_format(fname, binary=True)
#
# from gensim.models import word2vec
# w2v_model = word2vec.Word2Vec.load(fname)
# print(w2v_model.similarity('film', 'movie'))
#
# for word in vocab:
# print(word)
# here = 0
# find = 0
# for letter in word:
# here = here + 1
# if letter == ':':
# find = here
# try:
# word_vecs[word] = w2v_model[word[0:find]]
# success = success + 1
# except KeyError:
# count = count + 1
# some tokens from the dataset may not appear as an entry in the word embeds matrix, so i print and skip them
# print('Key error: {}'.format(word))
# continue
# print(count, 'problematic words', success, 'successful words')
# return word_vecs
def get_accuracy(truth, pred):
assert len(truth) == len(pred)
right = 0
for i in range(len(truth)):
if truth[i] == pred[i]:
right += 1.0
return right / len(truth)
def train_epoch_progress(model, train_iter, loss_function, optimizer, text_field, label_field, epoch):
model.train()
avg_loss = 0.0
truth_res = []
pred_res = []
count = 0
for batch in tqdm(train_iter, desc='Train epoch '+str(epoch+1)):
sent, label = batch.text, batch.label
label.data.sub_(1)
truth_res += list(label.data)
model.module.batch_size = len(label.data)
model.module.epoch = epoch+1
model.module.hidden = model.module.init_hidden()
model.module.count = count
pred = model.module(sent)
pred_label = pred.data.max(1)[1]
pred_res += [x for x in pred_label]
model.module.zero_grad()
loss = loss_function(pred, label)
avg_loss += loss.data[0]
count += 1
loss.backward()
optimizer.step()
avg_loss /= len(train_iter)
acc = get_accuracy(truth_res, pred_res)
# print(model.hidden)
return avg_loss, acc
def train_epoch(model, train_iter, loss_function, optimizer):
model.train()
avg_loss = 0.0
truth_res = []
pred_res = []
count = 0
for batch in train_iter:
sent, label = batch.text, batch.label
label.data.sub_(1)
truth_res += list(label.data)
model.module.batch_size = len(label.data)
model.module.hidden = model.module.init_hidden()
pred = model.module(sent)
pred_label = pred.data.max(1)[1].numpy()
pred_res += [x for x in pred_label]
model.module.zero_grad()
loss = loss_function(pred, label)
avg_loss += loss.data[0]
count += 1
loss.backward()
optimizer.step()
avg_loss /= len(train_iter)
acc = get_accuracy(truth_res, pred_res)
return avg_loss, acc
def evaluate(model, data, loss_function, name):
model.eval()
avg_loss = 0.0
truth_res = []
pred_res = []
for batch in data:
sent, label = batch.text, batch.label
label.data.sub_(1)
truth_res += list(label.data)
model.module.batch_size = len(label.data)
model.module.hidden = model.module.init_hidden()
pred = model.module(sent)
pred_label = pred.data.max(1)[1]
pred_res += [x for x in pred_label]
loss = loss_function(pred, label)
avg_loss += loss.data[0]
avg_loss /= len(data)
acc = get_accuracy(truth_res, pred_res)
print(name + ': loss %.2f acc %.1f' % (avg_loss, acc*100))
return acc
def load_sst(text_field, label_field, batch_size, Difficulty, cuda):
# for x in range(B_sizes):
if Difficulty == 'easy':
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-sentiment-classification-master copy/data')
train, dev, test = data.TabularDataset.splits(path='./SST2/', train='train.tsv',
validation='dev.tsv', test='test.tsv', format='tsv',
fields=[('text', text_field), ('label', label_field)])
if Difficulty == 'medium':
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-sentiment-classification-master')
train, dev, test = data.TabularDataset.splits(path='./data/Dataset/', train='train.tsv',
validation='test.tsv', test='test.tsv', format='tsv',
fields=[('text', text_field), ('label', label_field)])
if Difficulty == 'hard':
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z')
train, dev, test = data.TabularDataset.splits(path='./aclImdb/', train='usable_train.tsv',
validation='usable_dev.tsv', test='usable_test.tsv', format='tsv',
fields=[('text', text_field), ('label', label_field)])
text_field.build_vocab(train, dev, test)
label_field.build_vocab(train, dev, test)
# batch_size = 10*(.1*x*(torch.cos(torch.FloatTensor([x]))*torch.sin(torch.FloatTensor([2*x])))+.08*x).round() + 1
## for GPU run
if cuda == True:
train_iter, dev_iter, test_iter = data.BucketIterator.splits((train, dev, test),
batch_sizes=(batch_size, len(dev), len(test)), sort_key=lambda x: len(x.text), repeat=False, device=None)
else:
train_iter, dev_iter, test_iter = data.BucketIterator.splits((train, dev, test),
batch_sizes=(batch_size, len(dev), len(test)), sort_key=lambda x: len(x.text), repeat=False, device=-1)
return train_iter, dev_iter, test_iter
#def adjust_learning_rate(learning_rate, optimizer, epoch):
# lr = learning_rate * (0.9 ** (epoch //1))
# for param_group in optimizer.param_groups:
# param_group['lr'] = lr
# return optimizer
#for clustering
#if Difficulty == 'easy':
# EPOCHS= 30
# BATCH_SIZE = 30
# HIDDEN_DIM = 150
if Difficulty == 'easy':
EPOCHS= 1000
BATCH_SIZE = 5
HIDDEN_DIM = 100
#HIDDEN_DIM = round((150*(4/6)))
#HIDDEN_DIM = round((150*(4/5)))
#HIDDEN_DIM = round((150*(4/8)))
if Difficulty == 'medium':
EPOCHS= 30
BATCH_SIZE = 5
HIDDEN_DIM = 50
#HIDDEN_DIM = round((150*(4/6)))
#HIDDEN_DIM = round((150*(4/5)))
#HIDDEN_DIM = round((150*(4/8)))
if Difficulty == 'hard':
EPOCHS= 30
BATCH_SIZE = 5
HIDDEN_DIM = 50
USE_GPU = torch.cuda.is_available()
EMBEDDING_DIM = 300
timestamp = str(int(time.time()))
best_dev_acc = 0.0
#B_sizes = np.ones(BATCH_SIZE) #BATCH SIZE MUST BE MINIMUM OF 50
#for x in range(BATCH_SIZE):
# B_sizes[x]= 10*(.1*x*(torch.cos(torch.FloatTensor([x]))*torch.sin(torch.FloatTensor([2*x])))+.08*x).round() + 1
#B_sizes = max(B_sizes)
#def weights_init(LSTMSentiment):
# classname = LSTMSentiment.__class__.__name__
# if classname.find('LSTMSentiment') != -1:
# LSTMSentiment.weight.data.fill_(.5)
# LSTMSentiment.bias.data.fill_(0)
#Using = BiLSTMSentiment
Using = LSTMSentiment
if cuda:
torch.set_default_tensor_type('torch.cuda.FloatTensor')
print('Using: ', Using)
text_field = data.Field(lower=True)
label_field = data.Field(sequential=False)
train_iter, dev_iter, test_iter = load_sst(text_field, label_field, BATCH_SIZE, Difficulty = Difficulty, cuda = cuda)
count = 0
model = Using(embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, vocab_size=len(text_field.vocab),
use_gpu=cuda, label_size=len(label_field.vocab)-1, batch_size=BATCH_SIZE)
if cuda:
model = model.cuda()
model=torch.nn.DataParallel(model, device_ids=[0])
print('Load word embeddings...')
# # glove
#text_field.vocab.load_vectors('glove.6B.100d')
#text_field.vocab.load_vectors('hard_dataset_embeddings')
# word2vector
word_to_idx = text_field.vocab.stoi
pretrained_embeddings = np.random.uniform(-0.25, 0.25, (len(text_field.vocab), EMBEDDING_DIM))
pretrained_embeddings[0] = 0
#if Difficulty == 'easy':
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-sentiment-classification-master')
word2vec = load_bin_vec('./data/GoogleNews-vectors-negative300.bin', word_to_idx)
#if Difficulty == 'hard':
# os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z')
# word2vec = load_bin_vec('hard_dataset_embeddings', word_to_idx)
for word, vector in word2vec.items():
pretrained_embeddings[word_to_idx[word]-1] = vector
# text_field.vocab.load_vectors(wv_type='', wv_dim=300)
model.module.embeddings.weight.data.copy_(torch.from_numpy(pretrained_embeddings))
# model.embeddings.weight.data = text_field.vocab.vectors
# model.embeddings.embed.weight.requires_grad = False
best_model = model
optimizer = optim.Adam(model.parameters(), lr=1e-3)
#optimizer = optim.Adam(model.parameters(), lr=1e-2,weight_decay=1e-5)
loss_function = nn.NLLLoss()
#loss_function = nn.CrossEntropyLoss()
print('Training...')
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
time_start = time.time()
if not os.path.exists(out_dir):
os.makedirs(out_dir)
for epoch in range(EPOCHS):
# batch_modifer = epoch + 1
avg_loss, acc = train_epoch_progress(model, train_iter, loss_function, optimizer, text_field, label_field, epoch)
tqdm.write('Train: loss %.2f acc %.1f' % (avg_loss, acc*100))
dev_acc = evaluate(model, dev_iter, loss_function, 'Dev')
if dev_acc > best_dev_acc:
if best_dev_acc > 0:
os.system('rm '+ out_dir + '/best_model' + '.pth')
best_dev_acc = dev_acc
best_model = model
# torch.save(best_model.state_dict(), out_dir + '/best_model' + '.pth')
# evaluate on test with the best dev performance model
test_acc = evaluate(best_model, test_iter, loss_function, 'Test')
dev_acc = evaluate(model, dev_iter, loss_function, 'Dev')
test_acc = evaluate(best_model, test_iter, loss_function, 'Final Test')
time_finish = time.time()
total_time = time_finish - time_start
print('total time', (time_finish-time_start)/3600, 'hours')
total_time = (time_finish-time_start)/3600
os.chdir(r'C:\Users\john\Desktop\Deep_Learning_A_Z\pytorch-sentiment-classification-master/output/')
for x in range(1001):
if os.path.isfile('out_'+str(x)+'.csv') == False:
ID = x
break
eval(str(np.savetxt('out_'+str(ID)+'.csv', np.array([dev_acc,test_acc,total_time]), delimiter=',')))
#eval("np.savetxt('out_pm_%i.csv', np.array([dev_acc,test_acc,total_time]), delimiter=',')" %id)