import torch
import torchvision
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from torch.distributions import Normal
import torchvision.transforms as transforms
torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True
# Use standard FashionMNIST dataset
train_set = torchvision.datasets.FashionMNIST(
root = './data/FashionMNIST',
train = True,
download = True,
transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.ToTensor()
])
)
test_set = torchvision.datasets.FashionMNIST(
root = './data/FashionMNIST',
train = False,
download = True,
transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.ToTensor()
])
)
class MLPLayer(nn.Module):
"""
Hidden Layer of our BNN
"""
def __init__(self, input_dim, output_dim, rho_prior, rho0=-6., lambda0=0.99):
# initialize layers
super().__init__()
# set input and output dimensions
self.input_dim = input_dim
self.output_dim = output_dim
# initialize mu, rho and theta parameters for layer's weights
self.w_mu = nn.Parameter(torch.Tensor(input_dim, output_dim).uniform_(-0.6, 0.6))
self.w_rho = nn.Parameter(torch.Tensor(input_dim, output_dim).uniform_(rho0, rho0))
self.theta = nn.Parameter(logit(torch.Tensor(output_dim).uniform_(lambda0, lambda0)))
# initialize mu, rho and theta parameters for layer's biases, theta = logit(phi)
self.b_mu = nn.Parameter(torch.Tensor(output_dim).uniform_(-0.6, 0.6))
self.b_rho = nn.Parameter(torch.Tensor(output_dim).uniform_(rho0, rho0))
self.rho_prior = rho_prior
# self.device = device
# initialize weight samples (these will be calculated whenever the layer makes a prediction)
self.gamma = None
self.w = None
self.b = None
# initialize log pdf of prior and vb distributions
self.kl = 0
def forward(self, X, temp, phi_prior):
"""
For one Monte Carlo sample
:param X: [batch_size, input_dim]
:return: output for one MC sample, size = [batch_size, output_dim]
"""
# sample weights and biases
sigma_w = torch.log(1 + torch.exp(self.w_rho))
sigma_b = torch.log(1 + torch.exp(self.b_rho))
sigma_prior = torch.log(1 + torch.exp(self.rho_prior)) #
self.register_buffer('u', torch.rand(self.theta.shape))
u = self.u #to(self.device)
self.gamma = gumbel_softmax(self.theta, u, temp, hard=True)
self.gamma_w = self.gamma.expand(self.input_dim, self.output_dim)
self.gamma_b = self.gamma
# epsilon_w = Normal(0, 1).sample(self.w_mu.shape)
# epsilon_b = Normal(0, 1).sample(self.b_mu.shape)
self.register_buffer('epsilon_w', Normal(0, 1).sample(self.w_mu.shape))
self.register_buffer('epsilon_b', Normal(0, 1).sample(self.b_mu.shape))
epsilon_w = self.epsilon_w #to(self.device)
epsilon_b = self.epsilon_b #to(self.device)
self.w = self.gamma_w * (self.w_mu + sigma_w * epsilon_w)
self.b = self.gamma_b * (self.b_mu + sigma_b * epsilon_b)
output = torch.mm(X, self.w) + self.b.expand(X.size()[0], self.output_dim)
# record KL at sampled weight and bias
phi = sigmoid(self.theta)
w_phi = phi.expand(self.input_dim, self.output_dim)
b_phi = phi
kl_phi = phi * (torch.log(phi) - torch.log(phi_prior)) + \
(1 - phi) * (torch.log(1 - phi) - torch.log(1 - phi_prior))
kl_w = w_phi * (torch.log(sigma_prior) - torch.log(sigma_w) +
0.5 * (sigma_w ** 2 + self.w_mu ** 2) / sigma_prior ** 2 - 0.5)
kl_b = b_phi * (torch.log(sigma_prior) - torch.log(sigma_b) +
0.5 * (sigma_b ** 2 + self.b_mu ** 2) / sigma_prior ** 2 - 0.5)
self.kl = torch.sum(kl_w) + torch.sum(kl_b) + torch.sum(kl_phi)
return output
class SFunc(nn.Module):
def __init__(self, data_dim, hidden_dim1, hidden_dim2, target_dim, temp, phi_prior1, phi_prior2, builder, sigma_noise=1):
# initialize the network using the MLP layer
super().__init__()
# self.rho_prior = torch.Tensor([np.log(np.exp(1.3) - 1)]) #.to(device)
self.register_buffer('rho_prior', torch.Tensor([np.log(np.exp(1.3) - 1)]))
# self.device = device
self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding=2)
self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=2)
self.l1 = MLPLayer(data_dim, hidden_dim1, self.rho_prior) # , self.device)
self.l2 = MLPLayer(hidden_dim1, hidden_dim2, self.rho_prior) #, self.device)
self.l4 = OutLayer(hidden_dim2, target_dim, self.rho_prior) #, self.device)
self.target_dim = target_dim
# self.log_sigma_noise = torch.log(torch.Tensor([sigma_noise])) #.to(device)
self.register_buffer('log_sigma_noise', torch.log(torch.Tensor([sigma_noise])))
self.temp =temp
self.phi_prior1=phi_prior1
self.phi_prior2=phi_prior2
self.train_len= torch.tensor(len(builder))
def forward(self, X, y, temp,phi_prior1,phi_prior2 ):
"""
output of the BNN for one Monte Carlo sample
:param X: [batch_size, data_dim]
:return: [batch_size, target_dim]
"""
print("\tIn Model: input size", X.size())
output = F.relu(F.max_pool2d(self.conv1(X), 2))
output = F.relu(F.max_pool2d(self.conv2(output), 2))
output = F.relu(self.l1(output.reshape(-1, 64*8*8), temp, phi_prior1))
output = F.relu(self.l2(output, temp, phi_prior2))
output = self.l4(output)
#loss function here
return output.squeeze()
data_size = 60000
data_dim = 64*8*8
hidden_dim1 = 64*1*1
hidden_dim2 = 64*1*1
target_dim = 10
L=2
temp = torch.tensor(0.5)
phi_prior1 = torch.tensor(0.0001)
phi_prior2 = torch.tensor(0.0001)
lr = .001
batch_size =1024
epochs = 1
torch.set_default_tensor_type('torch.cuda.FloatTensor')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size,shuffle=True,num_workers=0)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = batch_size,shuffle=False,num_workers=0)
net = SFunc(data_dim, hidden_dim1, hidden_dim2, target_dim, temp, phi_prior1,phi_prior2,train_loader)
if torch.cuda.device_count() > 1:
print("There are", torch.cuda.device_count(), "GPUs!")
net = torch.nn.DataParallel(net,device_ids=[0,1])
net.to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
for epoch in range(epochs):
for batch in train_loader:
images, labels = batch[0].to(device), batch[1].to(device)
print("Outside: input size", images.size())
preds = net(images, labels,temp,phi_prior1,phi_prior2)
optimizer.zero_grad()
# loss.backward()
optimizer.step()
print("\n")
total = 0
correct = 0
with torch.no_grad():
for batch in test_loader:
images, labels = batch[0].to(device), batch[1].to(device)
labels_list.append(labels)
print("Outside: input size", images.size())
outputs = net(images, labels, temp, phi_prior1, phi_prior2)
preds2 = torch.max(outputs, 1)[1]
predictions_list.append(preds2)
correct += (preds2 == labels).sum()
total += labels.size(0)
test_accuracy = correct/total
In this code I am getting an error.
Traceback (most recent call last):
File "SSIG_Fashion-MNIST-HPCC_New.py", line 488, in <module>
preds = net(images, labels,temp.to(device),phi_prior1.to(device),phi_prior2.to(device))
File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/data_parallel.py", line 157, in forward
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/data_parallel.py", line 174, in scatter
return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/scatter_gather.py", line 44, in scatter_kwargs
inputs = scatter(inputs, target_gpus, dim) if inputs else []
File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/scatter_gather.py", line 36, in scatter
res = scatter_map(inputs)
File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/scatter_gather.py", line 23, in scatter_map
return list(zip(*map(scatter_map, obj)))
File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/scatter_gather.py", line 19, in scatter_map
return Scatter.apply(target_gpus, None, dim, obj)
File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/_functions.py", line 93, in forward
outputs = comm.scatter(input, target_gpus, chunk_sizes, ctx.dim, streams)
File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/comm.py", line 189, in scatter
return tuple(torch._C._scatter(tensor, devices, chunk_sizes, dim, streams))
RuntimeError: chunk expects at least a 1-dimensional tensor
I am not able to understand why when I pass simple tensor arguments to the wrapped model, I get the error mentioned here.