After loading the model state_dict, optimizer state_dict, and scheduler state_dict and then saving all three, the file size is double that of when saving all three without previously loading the three state_dict’s.
Running:
import torch
import torch.nn as nn
import os
device_id = 'cpu'
device = torch.device(device_id)
model = nn.Linear(100, 1000).to(device)
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.CyclicLR(
optimizer, 1e-7, 1e-4, 500, cycle_momentum=False
)
f = open('filesize.log', 'w')
f.write(torch.__version__ + '\n')
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'scheduler': scheduler.state_dict()
}
fname = 'linear_start.pth'
torch.save(checkpoint, fname)
f.write(fname + ' ' + str(os.path.getsize(fname)) + '\n')
for step in range(5):
optimizer.zero_grad()
x = torch.randn(64, 100).to(device)
out = model(x)
out.backward(torch.randn(64, 1000).to(device))
optimizer.step()
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'scheduler': scheduler.state_dict()
}
fname = 'linear_5steps.pth'
torch.save(checkpoint, fname)
f.write(fname + ' ' + str(os.path.getsize(fname)) + '\n')
for step in range(5):
optimizer.zero_grad()
x = torch.randn(64, 100).to(device)
out = model(x)
out.backward(torch.randn(64, 1000).to(device))
optimizer.step()
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'scheduler': scheduler.state_dict()
}
fname = 'linear_10steps.pth'
torch.save(checkpoint, fname)
f.write(fname + ' ' + str(os.path.getsize(fname)) + '\n')
checkpoint = torch.load('linear_5steps.pth')
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
scheduler.load_state_dict(checkpoint['scheduler'])
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'scheduler': scheduler.state_dict()
}
fname = 'linear_10steps_load.pth'
torch.save(checkpoint, fname)
f.write(fname + ' ' + str(os.path.getsize(fname)) + '\n')
for step in range(5):
optimizer.zero_grad()
x = torch.randn(64, 100).to(device)
out = model(x)
out.backward(torch.randn(64, 1000).to(device))
optimizer.step()
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'scheduler': scheduler.state_dict()
}
fname = 'linear_10steps_loadtrain.pth'
torch.save(checkpoint, fname)
f.write(fname + ' ' + str(os.path.getsize(fname)) + '\n')
checkpoint = torch.load('linear_10steps_loadtrain.pth')
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
scheduler.load_state_dict(checkpoint['scheduler'])
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'scheduler': scheduler.state_dict()
}
fname = 'linear_15steps_load.pth'
torch.save(checkpoint, fname)
f.write(fname + ' ' + str(os.path.getsize(fname)) + '\n')
for step in range(5):
optimizer.zero_grad()
x = torch.randn(64, 100).to(device)
out = model(x)
out.backward(torch.randn(64, 1000).to(device))
optimizer.step()
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'scheduler': scheduler.state_dict()
}
fname = 'linear_15steps_loadtrain.pth'
torch.save(checkpoint, fname)
f.write(fname + ' ' + str(os.path.getsize(fname)) + '\n')
f.close()
filesize.log’s output is:
1.10.2+cu113
linear_start.pth 406119
linear_5steps.pth 1214999
linear_10steps.pth 1214999
linear_10steps_load.pth 2428097
linear_10steps_loadtrain.pth 2428097
linear_15steps_load.pth 2428097
linear_15steps_loadtrain.pth 2428097
Similar results are found on both Ubuntu and Windows. Anyone know why this happens? Is this a bug?