Tensorboard with pytorch

my first attemp at visualizing with tensorboard.

built a convnet with nn.module class -


class ThreeLayerConvNet2(nn.Module):
    def __init__(self, in_channel, channel_1, channel_2, num_classes):
        super().__init__()
       
        pad1 = 2
        pad2 = 1
        ker1 = (5,5)
        ker2 = (3,3)
        feature_map1_size = (32 + 2 * pad1 - ker1[0] + 1, 32 + 2 * pad1 - ker1[1] + 1)
        feature_map2_size = (feature_map1_size[0] + 2 * pad2 - ker2[0] + 1, feature_map1_size[1] + 2 * pad2 - ker2[1] + 1)

        flat_size = np.prod(feature_map2_size) * channel_2
        self.conv1 = nn.Conv2d(in_channel, channel_1, kernel_size = ker1, padding = pad1,
        padding_mode = 'zeros')
        nn.init.kaiming_normal_(self.conv1.weight)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(channel_1, channel_2, kernel_size = ker2, padding = pad2,
        padding_mode = 'zeros')
        nn.init.kaiming_normal_(self.conv2.weight)
        self.fc = nn.Linear(flat_size, num_classes)
        #self.batchnorm2d_1 = nn.BatchNorm2d(in_channel)
        #self.batchnorm2d_2 = nn.BatchNorm2d(channel_1)
        #self.batchnorm1d = nn.BatchNorm1d(flat_size)
        pass


    def forward(self, x):
        scores = None
 
        #x = self.batchnorm2d_1(x)
        x = self.conv1(x)
        x = self.relu(x)
        #x = self.batchnorm2d_2(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = flatten(x)
        #x = self.batchnorm1d(x)
        scores = self.fc(x)
        pass


        return scores

num_classes = 10
in_channel = 3
channel_1 = 32
channel_2 = 16
learning_rate = 1e-2

model = ThreeLayerConvNet2(in_channel, channel_1, channel_2, num_classes)

optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                     momentum=0.9, nesterov=True)

then initialized tensorboard -

from torch.utils.tensorboard import SummaryWriter

# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter('cs231n/final_cifar10_convnet')

tried to feed the board a graph like shown in tutorial -

dataiter = iter(loader_train)
images, labels = dataiter.next()
writer.add_graph(ThreeLayerConvNet2,images)

and got the following error.

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-73-49d61804c4c3> in <module>
     1 dataiter = iter(loader_train)
     2 images, labels = dataiter.next()
----> 3 writer.add_graph(ThreeLayerConvNet2)

~\.conda\envs\torch_env\lib\site-packages\torch\utils\tensorboard\writer.py in add_graph(self, model, input_to_model, verbose)
   705         if hasattr(model, 'forward'):
   706             # A valid PyTorch model should have a 'forward' method
--> 707             self._get_file_writer().add_graph(graph(model, input_to_model, verbose))
   708         else:
   709             # Caffe2 models do not have the 'forward' method

~\.conda\envs\torch_env\lib\site-packages\torch\utils\tensorboard\_pytorch_graph.py in graph(model, args, verbose)
   281         processing.
   282     """
--> 283     with torch.onnx.set_training(model, False):  # TODO: move outside of torch.onnx?
   284         try:
   285             trace = torch.jit.trace(model, args)

~\.conda\envs\torch_env\lib\contextlib.py in __enter__(self)
    79     def __enter__(self):
    80         try:
---> 81             return next(self.gen)
    82         except StopIteration:
    83             raise RuntimeError("generator didn't yield") from None

~\.conda\envs\torch_env\lib\site-packages\torch\onnx\utils.py in set_training(model, mode)
    36         yield
    37         return
---> 38     old_mode = model.training
    39     if old_mode != mode:
    40         model.train(mode)

AttributeError: type object 'ThreeLayerConvNet2' has no attribute 'training'

Hi,

whats your PyTorch version? :smiley:
is it possible to upgrade to 1.6 or nightly version?

currently not because i’m doing the cs231n course of stanford and the 1.4 is required. maybe it’s not a big deal. i will try to upgrade and see.

i try now simply to follow the pytorch - tensorboard tutorial and it’s not working.

more specifically, the tensorboard web page starts but i can’t upload an image.
details:
conda 4.8.5 environment
windows 10
python 3.6.11
pytorch - 1.6
tensorboard - 1.15
pycharm IDE

i tried upgrading tensorboard, tensorboardX.

steps:
i run the program like the tutorial shows, in pycharm.

(not sure it’s critical but i’m new to pycharm and to module based programming. maybe i did something wrong there? full code added below)

in command line i activate the torch environment, change directory to the pycharm project/runs folder and then execute:

 tensorboard --logdir==runs

i get the link to local host web page and see the tensorboard GUI but no image data found. nothing is found for that matter but i’m trying as a first step to upload an image, so nothing works.

pycharm code:
test.py


import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
from Net import Net
from data_load import classes, trainloader, testloader
from helper_func import matplotlib_imshow
from board import writer

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

if __name__ == "__main__":
    # default `log_dir` is "runs" - we'll be more specific here
    # get some random training images
    dataiter = iter(trainloader)
    images, labels = dataiter.next()
    print(images.shape)
    # create grid of images
    img_grid = torchvision.utils.make_grid(images)

    # show images
    matplotlib_imshow(img_grid, one_channel=True)

    # write to tensorboard
    writer.add_image('four_fashion_mnist_images', img_grid)

board.py

from torch.utils.tensorboard import SummaryWriter
#from tensorboardX import SummaryWriter

writer = SummaryWriter('runs/fashion_mnist_experiment_1')

data_load.py

import torchvision
import torch
from helper_func import transform


# constant for classes
classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
        'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')


# datasets
trainset = torchvision.datasets.FashionMNIST('./data',
    download=True,
    train=True,
    transform=transform)
testset = torchvision.datasets.FashionMNIST('./data',
    download=True,
    train=False,
    transform=transform)

# dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                        shuffle=True, num_workers=2)


testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                        shuffle=False, num_workers=2)

helper_func.py

import matplotlib.pyplot as plt
import numpy as np
import torchvision.transforms as transforms

# helper function to show an image
# (used in the `plot_classes_preds` function below)
def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=0)
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()
# transforms
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

Net.py

import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

i mention all this code because so far i did all my programming in notebooks so i’m still confused about the order python interprets all this modules and maybe that caused an error.

Hi,

can you look if file is created in your ../runs/fashion_mnist_experiment_1 path?
But honestly your code looks right :confused:
I hope someone other can help you!

yes in the folder fashion_mnist_experiment1 i find many similar files i guess they are generated everyt ime i run the code.
file name: events.out.tfevents.1602411148.DESKTOP-9IVI3D

there are 2 files whose type is DESKTOP_9IVI3MD
and the rest are ‘0 file’

no clue what that means…

another strange details that might contribute, there seems to be an error with the order of python commands.

i added two print commands, one in the main script and one in the board module. the code changed only where i mention it, the rest is the same:

import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
from Net import Net
from data_load import classes, trainloader, testloader
from helper_func import matplotlib_imshow
print('writer not') # change 1
from board import writer

and in board.py module:

from torch.utils.tensorboard import SummaryWriter
#from tensorboardX import SummaryWriter
writer = SummaryWriter('runs/fashion_mnist_experiment_1')
print('writer ready') # change 2

when i run the following is printed:

writer not
writer ready
writer not
writer not
writer ready
writer ready
torch.Size([4, 1, 28, 28])

it seems to be repeating the main script before proceeding to the if main statement, and also strangely doubling the print on the second repeat.
what can cause this and is it related to the dysfunction of tesnorboard perhaps?

*update
it seems that the num_workers=2 in the data_load module in the torch.util.Dataloader object caused the double printing issue. after changing to num_workers=1 :

writer not
writer ready
writer not
writer ready
torch.Size([4, 1, 28, 28])
writer yes

and after changing num_workers=0 i got the printout i was expecting:

writer not
writer ready
torch.Size([4, 1, 28, 28])
writer yes

i still don’t know why or how is this related to the tensorboard issue.

i have a cuda enabled GPU gtx 1050

Can you write the from board import writer in the if __name__ == '__main__': block and try it again?

i inserted the from board import write into the if __name__ block.

ran the code, seems to be fine, except still printing the print command inside board (from what i understand if i import writer from board it should ignore the print command inside board, no?)

  1. i open anaconda prompt, activate the venv, change directory to pycharmprojects/deeplearning/runs

  2. type tensorboard --logdir==runs

get this result:
TensorBoard 1.15.0 at http://DESKTOP-9IVI3MD:6006/ (Press CTRL+C to quit)

  1. enter the link, still no image. also tried adding graphs to no use.

the folder runs/fashion_mnist_experiment_1 contains the tfevents files