What error do you get?
Segmentation fault (コアダンプ)
Could you post a code snippet which creates this error?
I’m not sure, if it’s related to the parameter/buffer issue of might be something else.
#!/usr/bin/env python
# coding: utf-8
from torchvision import datasets, transforms, models
import torch
from torch import nn, optim, utils, device as device_, cuda
import numpy as np
# Architecture Hyper-Parameters
NUM_INPUT = 28
TIME_STEPS = 28
NUM_CLASS = 10
NUM_HIDDEN = 28
BATCH_SIZE = 64
EPOCH = 64
LEARNING_RATE = 0.01
WEIGHT_DECAY = 0.01
def one_hot_embedding (y, length):
out = torch.zeros(length)
out[y] = 1.0
def q_sel (self):
for index in range(NUM_HIDDEN):
self.sel[index] = torch.sigmoid(self.w_x[index] * self.x[index] + self.w_h[index] * self.h[index])
def mux (self):
for index in range(NUM_HIDDEN):
self.h[index] = self.sel[index] * self.h[index] + (1.0 - self.sel[index]) * self.x[index]
def q_layer (self):
q_sel(self)
mux(self)
def fw_prop (self):
q_layer(self)
def fc (num_index_a, num_index_b, w, b, x):
out = torch.zeros(num_index_b)
for index_b in range(num_index_b):
out[index_b] = b[index_b]
for index_a in range(num_index_a):
out[index_b] += w[index_a][index_b]*x[index_a][index_b]
out = torch.sigmoid(out)
return out
def fc2 (num_index_a, num_index_b, w, b, x):
out = torch.zeros(num_index_b)
for index_b in range(num_index_b):
out[index_b] = b[index_b]
for index_a in range(num_index_a):
out[index_b] += w[index_a][index_b]*x[index_a]
dataset_train = datasets.MNIST(
'~/mnist',
train=True,
download=True,
transform=transforms.ToTensor())
dataloader_train = utils.data.DataLoader(dataset_train,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=4)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
# Gate-Weight
self.w_x = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.w_h = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
# Gate-Selector
self.register_buffer('sel', torch.stack([nn.Parameter(torch.zeros(NUM_INPUT)) for _ in range(NUM_HIDDEN)]))
# Input Vector
self.register_buffer('x', torch.stack([nn.Parameter(torch.zeros(NUM_INPUT)) for _ in range(NUM_HIDDEN)]))
# Output Vector
self.register_buffer('h', torch.stack([nn.Parameter(torch.zeros(NUM_INPUT)) for _ in range(NUM_HIDDEN)]))
self.fc_w1 = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.fc_b1 = nn.ParameterList([nn.Parameter(torch.randn(1)) for _ in range(NUM_HIDDEN)])
self.fc_w2 = nn.ParameterList([nn.Parameter(torch.randn(NUM_CLASS)) for _ in range(NUM_INPUT)])
self.fc_b2 = nn.ParameterList([nn.Parameter(torch.randn(1)) for _ in range(NUM_INPUT)])
def forward(self):
fw_prop(self)
out1 = fc(NUM_HIDDEN, NUM_INPUT, self.fc_w1, self.fc_b1, self.h)
out2 = fc2(NUM_INPUT, NUM_CLASS, self.fc_w2, self.fc_b2, out1)
return out2
model = Model()
model.cuda()
optimizer = optim.SGD(list(model.parameters()), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
criterion = nn.BCEWithLogitsLoss()
print(model.parameters())
for name, param in model.named_parameters():
if param.device.type != 'cuda':
print('param {}, not on GPU'.format(name))
model.train()
losses = []
count = 0
for epoch in range(EPOCH):
for x, t in dataloader_train:
y = one_hot_embedding(t, NUM_CLASS)
for time in range(TIME_STEPS):
for index in range(NUM_HIDDEN-1, 0, -1):
model.x[index] = model.x[index - 1]
model.h[index] = model.h[index - 1]
model.x[0] = torch.cuda.FloatTensor(x[0][0][time])
model.h[0] = torch.cuda.FloatTensor(torch.zeros(NUM_INPUT))
model.zero_grad()
out = model()
loss = criterion(out, y)
loss.backward(retain_graph = True)
optimizer.step()
losses.append(loss.cpu().data)
epoch_loss = np.array([np.mean(losses)])
print("{:6d}: Epoch:{} Loss:{:.9f}".format(count, epoch, np.mean(losses)))
count += 1
There are still several issues in the code:
-
one_hot_embedding
andfc2
do not return anything, so you might want to addreturn out
- use
model.x[0] = x[0][0][time].cuda()
model.h[0] = torch.zeros(NUM_INPUT).cuda()
to get rid of the seg fault. We’ll look into what’s happening.
- if you are initializing a tensor in your forward pass, stick to @spanev’s suggestion and pass the device argument, e.g. for
out = torch.zeros(num_index_b, device=x.device)
infc
- once this is fixed, you’ll run into
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
- change the inplace addition in
fc
andfc2
toout[index_b] = out[index_b] + ...
and wrap the manipulation ofself.sel
inq_sel
into atorch.no_grad()
block, otherwiseself.sel
will require gradients after the operation
This code should work:
from torchvision import datasets, transforms, models
import torch
from torch import nn, optim, utils, device as device_, cuda
import numpy as np
# Architecture Hyper-Parameters
NUM_INPUT = 28
TIME_STEPS = 28
NUM_CLASS = 10
NUM_HIDDEN = 28
BATCH_SIZE = 64
EPOCH = 64
LEARNING_RATE = 0.01
WEIGHT_DECAY = 0.01
def one_hot_embedding (y, length):
out = torch.zeros(length, device=y.device)
out[y] = 1.0
return out
def q_sel (self):
with torch.no_grad():
for index in range(NUM_HIDDEN):
self.sel[index] = torch.sigmoid(self.w_x[index] * self.x[index] + self.w_h[index] * self.h[index])
def mux (self):
for index in range(NUM_HIDDEN):
self.h[index] = self.sel[index] * self.h[index] + (1.0 - self.sel[index]) * self.x[index]
def q_layer (self):
q_sel(self)
mux(self)
def fw_prop (self):
q_layer(self)
def fc (num_index_a, num_index_b, w, b, x):
out = torch.zeros(num_index_b, device=x.device)
for index_b in range(num_index_b):
out[index_b] = b[index_b]
for index_a in range(num_index_a):
out[index_b] = out[index_b] + w[index_a][index_b]*x[index_a][index_b]
out = torch.sigmoid(out)
return out
def fc2 (num_index_a, num_index_b, w, b, x):
out = torch.zeros(num_index_b, device=x.device)
for index_b in range(num_index_b):
out[index_b] = b[index_b]
for index_a in range(num_index_a):
out[index_b] = out[index_b] + w[index_a][index_b]*x[index_a]
return out
dataset_train = datasets.MNIST(
'/home/ptrblck/python/data',
train=True,
download=False,
transform=transforms.ToTensor())
dataloader_train = utils.data.DataLoader(dataset_train,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=4)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
# Gate-Weight
self.w_x = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.w_h = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
# Gate-Selector
self.register_buffer('sel', torch.stack([torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]))
# Input Vector
self.register_buffer('x', torch.stack([torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]))
# Output Vector
self.register_buffer('h', torch.stack([torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]))
self.fc_w1 = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.fc_b1 = nn.ParameterList([nn.Parameter(torch.randn(1)) for _ in range(NUM_HIDDEN)])
self.fc_w2 = nn.ParameterList([nn.Parameter(torch.randn(NUM_CLASS)) for _ in range(NUM_INPUT)])
self.fc_b2 = nn.ParameterList([nn.Parameter(torch.randn(1)) for _ in range(NUM_INPUT)])
def forward(self):
fw_prop(self)
out1 = fc(NUM_HIDDEN, NUM_INPUT, self.fc_w1, self.fc_b1, self.h)
out2 = fc2(NUM_INPUT, NUM_CLASS, self.fc_w2, self.fc_b2, out1)
return out2
model = Model()
model.cuda()
optimizer = optim.SGD(list(model.parameters()), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
criterion = nn.BCEWithLogitsLoss()
print(model.parameters())
for name, param in model.named_parameters():
if param.device.type != 'cuda':
print('param {}, not on GPU'.format(name))
model.train()
losses = []
count = 0
for epoch in range(EPOCH):
for x, t in dataloader_train:
y = one_hot_embedding(t.cuda(), NUM_CLASS)
for time in range(TIME_STEPS):
for index in range(NUM_HIDDEN-1, 0, -1):
model.x[index] = model.x[index - 1]
model.h[index] = model.h[index - 1]
model.x[0] = x[0][0][time].cuda()
model.h[0] = torch.zeros(NUM_INPUT).cuda()
model.zero_grad()
out = model()
loss = criterion(out, y)
loss.backward(retain_graph = True)
optimizer.step()
losses.append(loss.cpu().data)
epoch_loss = np.array([np.mean(losses)])
print("{:6d}: Epoch:{} Loss:{:.9f}".format(count, epoch, np.mean(losses)))
count += 1
@ptrblck -san
This was my miss-pasting from original one.
Now works fine.
But wait, why code before this topic did work on calab?
And I would like to know how you found my miss-coding point (excepting the return)?, because I want not make same misstake in the future.
I’ve debugged the code step by step.
The creation of the cuda.FloatTensor
is not the recommended way, so I just changed is to directly pushing x[0][0][time]
to the device. However, this line of code should not create a segfault, so I’ve posted some debug information in your created issue.
Once this was done, the error messages helped debugging the other issues, e.g. by enabling anomaly detection.
I see, thank you very very much for your valuable advice.