when I save my checkpoint, the error occurs, how to fix it?
the config file, notice CHECKPOINT
import torch
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
CSV_DIR = 'C:/Users/PML/Documents/Florentino_space/cast_iron_preprocess/csv512_offset50'
IMG_DIR = 'C:/Users/PML/Documents/Florentino_space/cast_iron_preprocess/img512_offset50'
LEARNING_RATE = 2e-4
IMAGE_SIZE = 512
IN_CHANNELS = 1
OUT_CHANNELS = 1
BATCH_SIZE = 8
NUM_EPOCHS = 30
LOAD_MODEL = False
SAVE_MODEL = True
NUM_WORKER = 4
CHECKPOINT = 'sur_checkpoint.pth.tar'
this is my utils function:
def save_checkpoint(model, optimizer, filename="my_checkpoint.pth.tar"):
print("=> Saving checkpoint")
checkpoint = {
"state_dict": model.state_dict(),
"optimizer": optimizer.state_dict(),
}
torch.save(checkpoint, filename)
from model import AutoEncoder
import torch
from dataset import SurDataset
from utils import save_checkpoint, load_checkpoint, save_some_examples
from torch.utils.data import DataLoader
from torchvision.utils import save_image
import torch.nn as nn
import torch.optim as optim
import config
from tqdm import tqdm
model = AutoEncoder(in_channels=1, out_channels=1).to(config.DEVICE)
optimizer = optim.Adam(list(model.parameters()),
lr = config.LEARNING_RATE,
betas=(0.5, 0.999))
mse = nn.MSELoss()
scalar = torch.cuda.amp.GradScaler()
dataset = SurDataset(csv_dir=config.CSV_DIR, img_dir=config.IMG_DIR)
train_set, val_set = torch.utils.data.random_split(dataset, [15000, len(dataset)-15000])
loader = DataLoader(dataset=train_set, batch_size=config.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(dataset=val_set, batch_size=config.BATCH_SIZE)
loop = tqdm(loader)
loop = tqdm(loader)
if config.LOAD_MODEL:
load_checkpoint(
config.CHECKPOINT, model, optimizer, config.LEARNING_RATE,
)
model.train()
for epoch in range(config.NUM_EPOCHS):
print(f"epoch {epoch+1}/{config.NUM_EPOCHS}:")
losses = []
for idx, (csv_, target) in enumerate(loop):
csv_ = csv_.to(config.DEVICE)
target = target.to(config.DEVICE)
predict = model(csv_)
loss = mse(predict, target)
losses.append(loss.item())
optimizer.zero_grad()
loss.backward()
# gradient descent or adam step
optimizer.step()
if idx % 5 == 0:
save_some_examples(model, val_loader, epoch, folder="evaluation")
if config.SAVE_MODEL and epoch % 5 == 0:
save_checkpoint(model, optimizer, filename=config.CHECKPOINT) # here come the error!
print(f"loss at epoch {epoch+1}/{config.NUM_EPOCHS} is {losses:.4f}.")