Hi,
I tried to make a CNN network for document classification. I used Keras previously.
So I am new to PyTorch and this indexing part is a pretty confusing part.
I consider 150 words/documents. I made my word to index dictionary and convert each word in the documents to the index.
So as a training sample I have the following tensor:
Sample input size: torch.Size([1, 150])
Sample input:
tensor([[1685, 190, 5459, 727, 1295, 772, 5460, 102, 4425, 9076, 935, 7,
1200, 9077, 25, 83, 498, 830, 2169, 7, 4426, 27, 533, 1296,
199, 167, 433, 5461, 4427, 592, 26, 6298, 23, 34, 9078, 15,
149, 5462, 9079, 285, 128, 6299, 1201, 15, 46, 416, 190, 9080,
399, 139, 29, 3175, 900, 2170, 772, 54, 2880, 158, 482, 15,
371, 5463, 9081, 3488, 1686, 26, 9082, 5464, 22, 901, 336, 1748,
9083, 5465, 1531, 694, 134, 5466, 313, 9084, 9085, 5467, 772, 5468,
2881, 5, 3488, 26, 5463, 371, 5469, 2695, 679, 1921, 167, 9086,
2170, 520, 4428, 450, 72, 336, 6300, 521, 26, 695, 694, 1297,
46, 6301, 433, 100, 337, 33, 61, 5470, 620, 6, 3176, 9087,
2, 2326, 9088, 451, 339, 695, 935, 772, 2039, 9089, 33, 6302,
61, 60, 2696, 2, 2327, 9090, 451, 773, 2697, 15, 83, 498,
1531, 1114, 7, 34, 1922, 290]])
Sample label size: torch.Size([1])
Sample label:
tensor([1.])
I made the following network:
class CNN(nn.Module):
def __init__(self, vocab_size, output_size, embedding_dim, prob_drop):
super(CNN, self).__init__()
#Arguments"
filter_sizes = [1,2,3]
num_filters = 36
self.vocab_size = vocab_size
self.output_size = output_size
self.embedding = nn.EmbeddingBag(vocab_size, embedding_dim, sparse=True)
self.conv1 = nn.Conv2d(1,num_filters, (filter_sizes[0], embedding_dim))
self.conv2 = nn.Conv2d(1,num_filters, (filter_sizes[1], embedding_dim))
self.conv3 = nn.Conv2d(1,num_filters, (filter_sizes[2], embedding_dim))
# self.conv4 = nn.Conv2d(num_filters, (fiter_sizes[3], embedding_dim))
self.dropout = nn.Dropout(prob_drop)
def conv_(self, val, conv_layer):
conv_out = conv_layer(val)
activation = F.relu(conv_out.squeeze(3))# activation.size() = (batch_size, out_channels, dim1)
max_out = F.max_pool1d(activation, activation.size()[2]).squeeze(2)# maxpool_out.size() = (batch_size, out_channels)
return max_out
def forward(self, x):
x = x.long()
input_ = self.embedding(x)
input_ = input_.unsqueeze(1)
out1 = self.conv_(input_,self.conv1)
out2 = self.conv_(input_,self.conv2)
out3 = self.conv_(input_,self.conv3)
all_out = torch.cat((out1, out2, out3), 1) (batch_size, num_kernels*out_channels)
fc_in = self.dropout(all_out)
logits = self.label(fc_in)
return logit
vocab_size = len(vocab_to_int)+1
output_size = 1
embedding_dim = 100
prob_drop =0.1
net = CNN(vocab_size, output_size, embedding_dim, prob_drop)
lr = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr = lr)
the training part for one sample is as follow:
net.train()
for e in range(epochs):
for inputs, labels_ in train_one_loader:
print(inputs.size())
# zero the parameter gradients
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
However, the input size that I receive from embedding in training is [1,100] instead of [1,150,100] and it causes the error.
I am guessing that I miss one step in my training loop, but I can not figure it out.
Would you please help me to solve this problem?