How would pytorch devs recommend initializing weights of an lstm class?
For my application, I am implementing Figure 2b in https://arxiv.org/pdf/1511.04119.pdf and I am initializing the lstm weights from a normal distribution with 0 mean and unit variance. I find that using named_parameters
to initialize the weights seems to be doing some sort of tail recursion in the backend and this quickly leads to stackoverflow:
line 372, in named_parameters
for name, p in module.named_parameters(memo, submodule_prefix):
File "/home/lex/anaconda2/envs/py27/lib/python2.7/site-packages/torch/nn/modules/module.py", line 370, in named_parameters
for mname, module in self.named_children():
File "/home/lex/anaconda2/envs/py27/lib/python2.7/site-packages/torch/nn/modules/module.py", line 390, in named_children
for name, module in self._modules.items():
File "/home/lex/anaconda2/envs/py27/lib/python2.7/collections.py", line 127, in items
return [(key, self[key]) for key in self]
RuntimeError: maximum recursion depth exceeded while calling a Python object
Here is the offending code:
inLSTM1 = torch.mul(lt[0], feat_cube[0]).view(1,1,-1) #will have 2048 connections
regress = RecurrentModel(inputSize=inLSTM1.size(2), nHidden=[inLSTM1.size(2),1024, 64*32],\
noutputs=64*32,batchSize=args.cbatchSize, ship2gpu=args.ship2gpu, \
numLayers=1)
#use normal initialization for regression layer
for name, weights in regress.named_parameters():
init.uniform(weights, 0, 1)
y1, l3in = regress(inLSTM1)
inLSTM3 = torch.mul(l3in[0], feat_cube[2].view(1, 1, -1))
#reshape last lstm layer
regress.lstm3 = nn.LSTM(1024, 64*64, 1, bias=False, batch_first=False, dropout=0.3)
for name, weights in regress.lstm3.named_parameters():
init.uniform(weights, 0, 1)
y3, l2in = regress(inLSTM3)
inLSTM2 = torch.mul(l2in[0], feat_cube[1].view(1, 1, -1))
# Fix layers 1, 3, 4, 5, 6, 7 | layers 0 and 2 have unique shapes
regress.lstm1 = nn.LSTM(64*64, 64*64, 1, bias=False, batch_first=False, dropout=0.3)
regress.lstm2 = nn.LSTM(64*64, 64*16, 1, bias=False, batch_first=False, dropout=0.3)
regress.lstm3 = nn.LSTM(64*16, 64*64, 1, bias=False, batch_first=False, dropout=0.3)
for name, weights in regress.named_parameters():
init.uniform(weights, 0, 1)
y2, l4in = regress(inLSTM2)
Doing this
for m in regress.modules():
if(isinstance(m, nn.LSTM)):
mvals = m.state_dict().values()
init.uniform(mvals[0], 0, 1)
init.uniform(mvals[1], 0, 1)
does not help either.