Hi, ia m trying to run this tutorial code https://github.com/tuhinsharma121/federated-ml/blob/master/notebooks/network-threat-detection-using-federated-learning.ipynb on my data
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import syft as sy
import pandas as pd
colnames =[‘A’,‘B’,‘C’,‘D’]
df= pd.read_csv(“Z:/auction/python/PCA/data/train1.csv”, names=colnames+[‘Target’])[:7195]
df=df.drop(df.index[0])
df.head()
import plotly.graph_objects as go
from collections import Counter
threat_count_dict = Counter(df[‘Target’])
threat_types = list(threat_count_dict.keys())
threat_counts = [threat_count_dict[Target] for Target in threat_types]
print("Total distinct number of threat types : ",len(threat_types))
fig = go.Figure([go.Bar(x=threat_types, y=threat_counts,text=threat_counts,textposition=‘auto’)])
#fig.show()
numerical_colnames = [‘A’,‘B’,‘C’,‘D’]
final_df = df[numerical_colnames].copy()
Lets remove the numerical columns with constant value
#numerical_df = numerical_df.loc[:, (numerical_df != numerical_df.iloc[0]).any()]
lets scale the values for each column from [0,1]
N.B. we dont have any negative values]
#final_df = final_df/final_df.max()
X = final_df.values
final dataframe has 33 features
print(“Shape of feature matrix”,X.shape)
#Construct the target vector¶
#from sklearn.preprocessing import LabelEncoder
y = df[‘Target’].values
#encoder = LabelEncoder()
use LabelEncoder to encode the threat types in numeric values
#y = encoder.fit_transform(threat_types)
print("Shape of target vector : ",y.shape)
#Train/Test Split¶
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.4, random_state=42)
print("Number of records in training data : ", X_train.shape[0])
print("Number of records in test data : ", X_test.shape[0])
print("Total distinct number of threat types in training data : ",len(set(y_train)))
print("Total distinct number of threat types in test data : ",len(set(y_test)))
#Lets set up the environment for federated learning¶
#%%capture
import torch
import syft as sy
Hook PyTorch ie add extra functionalities to support Federated Learning
hook = sy.TorchHook(torch)
Sets the seed for generating random numbers.
torch.manual_seed(1)
Select CPU computation, in case you want GPU use “cuda” instead
device = torch.device(“cpu”)
Data will be distributed among these VirtualWorkers.
Remote training of the model will happen here.
gatway1 = sy.VirtualWorker(hook, id=“gatway1”)
gatway2 = sy.VirtualWorker(hook, id=“gatway2”)
#Lets set the training params¶
import numpy as np
Number of times we want to iterate over whole training data
BATCH_SIZE = 20
EPOCHS = 2
LOG_INTERVAL = 5
lr = 0.01
n_feature = X_train.shape[1]
n_class = np.unique(y_train).shape[0]
print("Number of training features : ",n_feature)
print("Number of training classes : ",n_class)
X_train = np.vstack(X_train[:, :]).astype(np.float)
X_test = np.vstack(X_test[:, :]).astype(np.float)
y_train = np.vstack(y_train[:]).astype(np.float)
y_test = np.vstack(y_test[:]).astype(np.float)
train_inputs = torch.tensor(X_train,dtype=torch.float).tag(“#iot”, “#network”,“data”,“#train”)
train_labels = torch.tensor(y_train).tag(“#iot”, “#network”,“#target”,“#train”)
test_inputs = torch.tensor(X_test,dtype=torch.float).tag(“#iot”, “#network”,“#target”,“#train”)
test_labels = torch.tensor(y_test).tag(“#iot”, “#network”,“#target”,“#train”)
Send the training and test data to the gatways in equal proportion.
train_idx = int(len(train_labels)/2)
test_idx = int(len(test_labels)/2)
gatway1_train_dataset = sy.BaseDataset(train_inputs[:train_idx], train_labels[:train_idx]).send(gatway1)
gatway2_train_dataset = sy.BaseDataset(train_inputs[train_idx:], train_labels[train_idx:]).send(gatway2)
gatway1_test_dataset = sy.BaseDataset(test_inputs[:test_idx], test_labels[:test_idx]).send(gatway1)
gatway2_test_dataset = sy.BaseDataset(test_inputs[test_idx:], test_labels[test_idx:]).send(gatway2)
Create federated datasets, an extension of Pytorch TensorDataset class
federated_train_dataset = sy.FederatedDataset([gatway1_train_dataset, gatway2_train_dataset])
federated_test_dataset = sy.FederatedDataset([gatway1_test_dataset, gatway2_test_dataset])
Create federated dataloaders, an extension of Pytorch DataLoader class
federated_train_loader = sy.FederatedDataLoader(federated_train_dataset, shuffle=True, batch_size=BATCH_SIZE)
federated_test_loader = sy.FederatedDataLoader(federated_test_dataset, shuffle=False, batch_size=BATCH_SIZE)
############################__________________________________________________________________________________________________
#Lets define a simple Logistic Regression Model in Pytorch
import torch.nn as nn
class Net(nn.Module):
def __init__(self, input_dim, output_dim): """ input_dim: number of input features. output_dim: number of labels. """ super(Net, self).__init__() self.linear = torch.nn.Linear(input_dim, output_dim) def forward(self, x): outputs = self.linear(x) return outputs
import torch.nn.functional as F
def train(model, device, federated_train_loader, optimizer, epoch):
model.train() # Iterate through each gateway's dataset for idx, (data, tar) in enumerate(federated_train_loader): batch_idx = idx+1 # Send the model to the right gateway model.send(data.location) # Move the data and target labels to the device (cpu/gpu) for computation #tar = tar.unsqueeze(-1) # -1 stands for last here equivalent to 1 data, tar = data.to(device), tar.to(device) #tar = tar.unsqueeze(1) # -1 stands for last here equivalent to 1 # Clear previous gradients (if they exist) optimizer.zero_grad() # Make a prediction output = model(data) loss = F.binary_cross_entropy(output,tar) # Calculate the gradients loss.backward() # Update the model weights optimizer.step() # Get the model back from the gateway model.get() if batch_idx==len(federated_train_loader) or (batch_idx!=0 and batch_idx % LOG_INTERVAL == 0): # get the loss back loss = loss.get() print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * BATCH_SIZE, len(federated_train_loader) * BATCH_SIZE, 100. * batch_idx / len(federated_train_loader), loss.item()))
#Lets define the validation process¶
import torch.optim as optim
Initialize the model
model = Net(n_feature,n_class)
#Initialize the SGD optimizer
optimizer = optim.SGD(model.parameters(), lr=lr)
for epoch in range(1, EPOCHS + 1):
# Train on the training data in a federated way train(model, device, federated_train_loader, optimizer, epoch) # Check the test accuracy on unseen test data in a federated way
I got this error
RuntimeError: 1D target tensor expected, multi-target not supported
so i changed
loss = F.cross_entropy(output,tar) to
loss = F.binary_cross_entropy(output,tar) as i have binary classification data
Now i m getting this error
UserWarning:
Using a target size (torch.Size([20, 1, 1])) that is different to the input size (torch.Size([20, 2])) is deprecated. Please ensure they have the same size.
in binary_cross_entropy
“!= input nelement ({})”.format(target.numel(), input.numel()))
ValueError: Target and input must have the same number of elements. target nelement (20) != input nelement (40)