Hi everyone,
I’m quite new to Python in general and I’ve been thrown in at the deep end. I’m building a RNN classifier that has 13 feature inputs and a binary label. I noticed an issue when calculating the accuracy of my network…I was getting accuracy rates ranging from 102% to 175% which obviously isn’t right.
I’ve had a look at the shape of my data as think this is causing the issue. I’ve attempted to reshape but now I’m getting error message 'RuntimeError: shape ‘[1, 1, 13]’ is invalid for input of size 26’
Can some please point me in the direction of the mistake I’m making? I think it’s something quite obvious but being so new at this I can’t seem to work it out.
> device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
>
> # Hyper-parameters
>
> num_classes = 2
> num_epochs = 10
> batch_size = 10
> learning_rate = 0.001
> input_size = 13
> sequence_length = 1
> hidden_size = 14
> num_layers = 3
>
>
> # load-dataset
> class URLTestDataset(Dataset):
> """URL Test Dataset"""
>
> def __init__(self, csv_file):
> self.url_frame = pd.read_csv(csv_file)
>
> def __len__(self):
> return len(self.url_frame)
>
> def __getitem__(self, idx):
> if torch.is_tensor(idx):
> idx = idx.tolist()
>
> label = self.url_frame.iloc[idx, 13]
> features = self.url_frame.iloc[idx, :13]
> label = np.array([label])
> features = np.array([features])
> features = torch.from_numpy(features)
> label = torch.from_numpy(label)
> sample = (features, label)
> return sample
>
>
> train_dataset = URLTestDataset(csv_file='urls2.csv')
> test_dataset = URLTestDataset(csv_file='urls1.csv')
>
> # Data loader
> train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True)
>
> test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=2, shuffle=True)
>
>
> class RNN(nn.Module):
> def __init__(self, input_size, hidden_size, num_layers, num_classes):
> super(RNN, self).__init__()
> self.num_layers = num_layers
> self.hidden_size = hidden_size
> self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
> self.fc = nn.Linear(hidden_size, num_classes)
>
> def forward(self, x):
> h0 = torch.zeros((self.num_layers, x.size(0), self.hidden_size), dtype=torch.float).to(device)
> #c0 = torch.zeros((self.num_layers, x.size(0), self.hidden_size), dtype=torch.float).to(device)
>
> out, _ = self.rnn(x, h0)
>
> out = out[:, -1, :]
>
> out = self.fc(out)
> return out
>
>
> model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
>
>
> # Loss and optimizer
> criterion = nn.CrossEntropyLoss()
> optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
>
> # Train the model
>
> n_total_steps = len(train_loader)
>
> for epoch in range(num_epochs):
> for i, (features, label) in enumerate(train_loader):
> features = features.reshape(1, sequence_length, input_size).to(device)
>
> label = label.to(device)
> features = features.to(device)
> features = features.float()
>
> # Forward pass
>
> outputs = model(features)
> outputs = outputs.float()
>
> loss = criterion(outputs, label.squeeze())
>
> loss.backward()
> optimizer.zero_grad()
> optimizer.step()
>
> if (i + 1) % 100 == 0:
> print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss: {loss.item():.4f}')
>
> # Test
> with torch.no_grad():
> n_correct = 0
> n_samples = 0
> for features, label in test_loader:
> features = features.reshape(1, sequence_length, input_size).to(device)
> features = features.float()
> label = label.to(device)
> outputs = model(features)
>
>
> _, predicted = torch.max(outputs.data, 1)
> n_samples += label.size(0)
> n_correct += (predicted == label).sum().item()
>
> acc = 100.0 * n_correct / n_samples
> print(f'Accuracy of the network on the test urls: {acc} %') ```