Here is the code. I feel like the error might be caused when I say layers+=[RowLSTM()], since this has not been pushed to GPU yet? Sorry for the length but the error could be anywhere so here it all is.
import torch.nn.init as init
import torch
__all__ = [
'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
'vgg19_bn', 'vgg19',
]
class VGG(nn.Module):
'''
VGG model
'''
def __init__(self, features): # features represents the layers array
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(512,512),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 512),
nn.ReLU(True),
nn.Linear(512, 10),
)
# Initialize weights
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
m.bias.data.zero_()
def forward(self, x): # x is the image, we run x through the layers
print(x.size())
x = self.features(x) # runs through all features, where each feature is a function
x = x.view(x.size(0), -1)
# after running through features, does sequential steps to finally classify
x = self.classifier(x)
# print(x)
return x
def make_layers(cfg, batch_norm=False):
# print("Making layers!")
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
layers+=[RLSTM()]
return nn.Sequential(*layers)
class RLSTM(nn.Module):
def __init__(self):
super(RLSTM,self).__init__()
def forward(self, image):
print("going in rowlstm")
global current
global _layer
global isgates
size = image.size()
b = size[0]
indvs = list(image.split(1,0)) # split up the batch into individual images
#print(indvs[0].size())
tensor_array = []
for i in range(b):
current = 0
_layer = []
isgates = []
tensor_array.append(self.RowLSTM(indvs[i]))
seq=tuple(tensor_array)
trans = torch.cat(seq,0)
return trans.cuda() # trying to make floattensor error go away
def RowLSTM(self, image):
global current
global _layer
global isgates
# input-to-state (K_is * x_i) : 3x1 convolution. generate 4h x n x n tensor. 4hxnxn tensor contains all i -> s info
# the input to state convolution should only be computed one time
if current==0:
n = image.size()[2]
ch=image.size()[1]
input_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1))
isgates = self.splitIS(input_to_state(image)) # convolve, then split into gates (4 per row)
cell=RowLSTMCell(0,torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1))
# now have dummy, learnable variables for first row
_layer.append(cell)
else:
Cell_prev = _layer[current-1] # access previous row
hidPrev = Cell_prev.getHiddenState()
ch = image.size()[1]
# print("about to apply conv1d")
state_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1)) # error is here: hidPrev is an array - not a valid number of input channel
# print("applied conv1d")
prevHid=Cell_prev.getHiddenState()
ssgates = self.splitSS(state_to_state(prevHid.unsqueeze(0))) #need to unsqueeze (Ex: currently 16x5, need to make 1x16x5)
gates = self.addGates(isgates,ssgates,current)
# split gates
ig, og, fg, gg = gates[0], gates[1], gates[2], gates[3] # into four, ADD SIGMOID!
cell = RowLSTMCell(Cell_prev,ig,og,fg,gg,0,0)
cell.compute()
_layer.append(cell)
# attempting to eliminate requirement of getting size
#print(current)
try:
current+=1
y=(isgates[0][0][1][current])
return self.RowLSTM(image)
except Exception as error:
concats=[]
for cell in _layer:
tensor=torch.unsqueeze(cell.h,0)
concats.append(tensor)
seq=tuple(concats)
tensor=torch.cat(seq,3)
return tensor
def splitIS(tensor): #always going to be splitting into 4 pieces, so no need to add extra parameters
inputStateGates={}
size=tensor.size() # 1 x 4h x n x n
out_ft=size[1] # get 4h for the nxnx4h tensor
num=size[2] # get n for the nxn image
hh=out_ft/4 # we want to split the tensor into 4, for the gates
tensor = torch.squeeze(tensor) # 4h x n x n
# First, split by row: Creates n tensors of 4h x n x 1
rows = list(tensor.split(1,2))
for i in range(num):
# Each row is a tensor of 4h x n x 1, split it into 4 of h x n x 1
row=rows[i]
inputStateGates[i]=list(row.split(hh,0))
return inputStateGates
def splitSS(tensor): # 1 x 4h x n x 1, create 4 of 1 x h x n x 1
size=tensor.size()
out_ft=size[1] # get 4h for the 1x4hxn tensor
num=size[2] # get n for the 1xhxn row
hh=out_ft/4 # we want to split the tensor into 4, for the gates
tensor = tensor.squeeze(0) # 4h x n x 1
splitted=list(tensor.split(hh,0))
return splitted
def addGates(i2s,s2s,key):
""" these dictionaries are of form {key : [[i], [o], [f], [g]]}
we want to add pairwise elemeents """
# i2s is of form key: [[i], [o], [f], [g]] where each gate is hxn
# s2s is of form [[h,n],[h,n],[h,n], [h,n]]
gateSum = []
for i in range(4): # always of length 4, representing the gates
gateSum.append(torch.sigmoid(i2s[key][i] + s2s[i]))
return gateSum
cfg = {
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M',
512, 512, 512, 512, 'M'],
}