Hi I am using a time series data classification using densenet with two classes 0/1. The accuracy is 56% on test, 51% training and 48% on test for 100 epochs. following is the densenet architecture with 12 layers :
class BasicBlock(nn.Module):
def __init__(self, in_planes, out_planes, dropRate=0.0):
super(BasicBlock, self).__init__()
self.bn1 = nn.BatchNorm1d(in_planes)
self.relu = nn.SELU(inplace=True)
self.conv1 = nn.Conv1d(in_planes, out_planes, kernel_size=3, stride=1,
padding=1, bias=True)##bias=False)
self.droprate = dropRate
def forward(self, x):
out = self.conv1(self.relu(self.bn1(x)))
if self.droprate > 0:
out = F.dropout(out, p=self.droprate, training=self.training)
return torch.cat([x, out], 1)
class BottleneckBlock(nn.Module):
def __init__(self, in_planes, out_planes, dropRate=0.0):
super(BottleneckBlock, self).__init__()
inter_planes = out_planes * 4
self.bn1 = nn.BatchNorm1d(in_planes)
self.relu = nn.SELU(inplace=True)
self.conv1 = nn.Conv1d(in_planes, inter_planes, kernel_size=1, stride=1,
padding=0, bias = True)##bias=False)
self.bn2 = nn.BatchNorm1d(inter_planes)
self.conv2 = nn.Conv1d(inter_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=True)
self.droprate = dropRate
def forward(self, x):
out = self.conv1(self.relu(self.bn1(x)))
if self.droprate > 0:
out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
out = self.conv2(self.relu(self.bn2(out)))
if self.droprate > 0:
out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
return torch.cat([x, out], 1)
class TransitionBlock(nn.Module):
def __init__(self, in_planes, out_planes, dropRate=0.0):
super(TransitionBlock, self).__init__()
self.bn1 = nn.BatchNorm1d(in_planes)
##self.relu = nn.ReLU(inplace=True)
self.relu = nn.SELU(inplace=True)
self.conv1 = nn.Conv1d(in_planes, out_planes, kernel_size=1, stride=1,
padding=0, bias=True)
self.droprate = dropRate
def forward(self, x):
out = self.conv1(self.relu(self.bn1(x)))
if self.droprate > 0:
out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
return F.avg_pool1d(out, 2)
class DenseBlock(nn.Module):
def __init__(self, nb_layers, in_planes, growth_rate, block, dropRate=0.0):
super(DenseBlock, self).__init__()
self.layer = self._make_layer(block, in_planes, growth_rate, nb_layers, dropRate)
def _make_layer(self, block, in_planes, growth_rate, nb_layers, dropRate):
layers = []
for i in range(nb_layers):
layers.append(block(in_planes+i*growth_rate, growth_rate, dropRate))
return nn.Sequential(*layers)
def forward(self, x):
return self.layer(x)
class DenseNet(nn.Module):
def __init__(self, depth, num_classes, growth_rate=12,
reduction=0.5, bottleneck=True, dropRate=0.0): #reduction = 0.5 is compression and used 1x1 before 3x3
#therefore, Dense-BC architecture it is, k:growth_rate = 12
super(DenseNet, self).__init__()
in_planes = 2 * growth_rate
n = (depth - 4) / 3
if bottleneck == True:
n = n/2
block = BottleneckBlock
else:
block = BasicBlock
n = int(n)
print('n:',n)
# 1st conv before any dense block
#self.conv1 = nn.Conv1d(3, in_planes, kernel_size=3, stride=1,
# padding=1, bias=False)
self.conv1 = nn.Conv1d(1, in_planes, kernel_size=3, stride=1,
padding=1, bias=True)##bias=False)
# 1st block
self.block1 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
in_planes = int(in_planes+n*growth_rate)
self.trans1 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate)
in_planes = int(math.floor(in_planes*reduction))
# 2nd block
self.block2 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
in_planes = int(in_planes+n*growth_rate)
##self.trans2 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate)
##in_planes = int(math.floor(in_planes*reduction))
# 3rd block
##self.block3 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
##in_planes = int(in_planes+n*growth_rate)
# global average pooling and classifier
self.bn1 = nn.BatchNorm1d(in_planes)
self.relu = nn.SELU(inplace=True)
self.in_planes = in_planes
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.bias.data.zero_()
def getInPlanes(self):
return self.in_planes
def forward(self, x):
out = self.conv1(x)
out = self.block1(out)
out = self.trans1(out)
out = self.block2(out)
out = self.trans2(out)
out = self.block3(out)
out = self.relu(self.bn1(out))
out = F.avg_pool1d(out, 8)#(input,kernel_size)
##out = F.adaptive_avg_pool1d(out,8)
out = out.view(-1, self.in_planes)
return out
class Network(nn.Module):
def __init__(self,depth = 40,num_classes=2,growth_rate = 12,dropRate = 0.0):
super(Network, self).__init__()
print('depth:',depth)
self.DenseNet1=DenseNet(depth = depth,num_classes=num_classes,growth_rate = growth_rate,dropRate=dropRate)#3,num_classes=2)
self.DenseNet2=DenseNet(depth = depth,num_classes=num_classes,growth_rate = growth_rate,dropRate=dropRate)
self.DenseNet3=DenseNet(depth = depth,num_classes=num_classes,growth_rate = growth_rate,dropRate=dropRate)
self.DenseNet4=DenseNet(depth = depth,num_classes=num_classes,growth_rate = growth_rate,dropRate=dropRate)
self.DenseNet5=DenseNet(depth = depth,num_classes=num_classes,growth_rate = growth_rate,dropRate=dropRate)
self.in_planes = self.DenseNet1.getInPlanes()
#print('in_planes : ',self.in_planes)
self.fc = nn.Linear(self.in_planes*5, num_classes)
self.softmax=nn.Softmax()
def forward(self,x):
batch_size, no_of_series, no_of_days = x.shape
close=np.reshape(x[:,0],(batch_size,1,no_of_days))
out1=self.DenseNet1(close)
#print('*******************************************************')
#print('out1 size : ',out1.shape)
_open = np.reshape(x[:,1],(batch_size,1,no_of_days))
out2=self.DenseNet2(_open)
high = np.reshape(x[:,2],(batch_size,1,no_of_days))
out3=self.DenseNet3(high)
low = np.reshape(x[:,3],(batch_size,1,no_of_days))
out4=self.DenseNet4(low)
volume = np.reshape(x[:,4],(batch_size,1,no_of_days))
out5=self.DenseNet5(volume)
gp = torch.stack((out1,out2,out3,out4,out5),1)
gp = gp.view(gp.size(0), -1)
#print("*******************stacked tensors***************")
#print(gp)
##print('stacked tensor\'s shape : ', gp.shape)
x=self.fc(gp)
##return x
y=self.softmax(x)
return y
def train_model(epoch, model, optimizer, train_loader,cuda):
model.train()
t0 = time.time()
#test_loss = 0
correct=0
equals=0
total_correct=0
final_loss = 0
for batch_idx, (X,labels) in enumerate(train_loader):
Data,labels = map(lambda x: Variable(x), [X,labels])
optimizer.zero_grad()
data=Data
output = model(data)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device : ', device)
final_output=output.to(device)
#print(labels.size(0))
num_samples = labels.size(0)
##print('predicted labels',final_output.squeeze())
##print('Actual labels',labels.squeeze())
loss = F.cross_entropy(final_output, labels, size_average=False)
final_loss += loss
##print('Loss : ',loss)
loss.backward()
optimizer.step()
avg_loss = loss / num_samples
print('Train Epoch: {} Batch: {} [{}/{} ({:.2f}%, time:{:.2f}s)]\tLoss: {:.6f}'.format(
epoch, batch_idx, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), time.time() - t0,
avg_loss.data))
#t0 = time.time()
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
labels = labels.to("cpu")
correct = pred.eq(labels.data.view_as(pred))
equals = torch.sum(correct)
total_correct+=equals
t0 = time.time()
samples=train_loader.dataset.total_samples
loss /= samples
final_loss /= samples
accuracy = total_correct.item()/samples
print('Training Accuracy : ',accuracy*100)
print('Training Loss : ',final_loss.item())
if epoch%20 == 0 or epoch ==1 or epoch == epochs:
epochs_list.append(epoch)
train_loss.append(final_loss.item())
train_accuracies.append(accuracy*100)
path = 'some path'
torch.save(model,path+str(epoch)+'.pth')
torch.save(model,path)
print('model saved')
def validate(epoch,model, validation_loader,optimizer):
model.eval()
test_loss = 0
correct = 0
equals=0
total_correct=0
for batch_id,(X,labels) in enumerate(validation_loader):
#if cuda:
# Data ,labels= map(lambda x:x.cuda(),[X,labels])
Data,labels = map(lambda x: Variable(x), [X,labels])
optimizer.zero_grad()
data=Data
output = model(data)
output = output.squeeze()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
final_val_output=output.to(device)
labels = labels.squeeze()
test_loss += F.cross_entropy(final_val_output, labels, size_average=False) # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
labels = labels.to("cpu")
correct = pred.eq(labels.data.view_as(pred))
equals = torch.sum(correct)
total_correct+=equals
samples=validation_loader.dataset.total_samples
test_loss /= samples
accuracy = total_correct.item()/samples
if epoch%20 == 0 or epoch ==1 or epoch == epochs:
validation_loss.append(test_loss.item())
val_accuracies.append(accuracy*100)
print('Accuracy : ',accuracy)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
test_loss, total_correct,samples,100 * accuracy))
print("==============================================")
return "{:.4f}%".format(100. * total_correct / samples), accuracy
def test(test_loader):
path = 'some path'
print('path:',path)
model2 = torch.load(path)
print('model loaded')
model2.eval()
correct = 0
total = 0
cnt = 0
with torch.no_grad():
for data in test_loader:
cnt+=1
test_data, labels = data
outputs = model(test_data)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('cnt:',cnt)
print('Test Accuracy: {:.4f}%'.format(100 * correct / total))
X_train,Y_train,X_val,Y_val,X_test,Y_test = splitData(feat_wise_data,labels_new)
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
epochs=100
batch_size=8
lr=0.0005##0.001##0.0005##0.001##0.0005##0.05#0.001
momentum=0.9
cuda=False
seed=1
log_interval=300
torch.manual_seed(seed)
model=Network(depth = 15,dropRate=0.3)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', factor=0.5, patience=2, verbose=True, threshold_mode='abs', threshold=0.01, min_lr=1e-6)
for epoch in range(1, epochs + 1):
train_model(epoch, model, optimizer, train_loader,cuda=True)
acc_str, acc = validate(epoch,model,validation_loader,optimizer)
scheduler.step(acc)
test(test_loader)
My plots for losses are coming out as :
unable to understand why is this happening? I have tried changing learning rates, batch size, epochs but nothing has worked, dropout rates. Losses and accuracies are not stable as well as good since there are only two classes and this seems to give random model. Please suggest as I am new to Deep learning and pytorch.