Hi everyone,
I’ve been trying to run StyleGAN2 ADA on the following properties of my virtual environment:
- OS: Windows 10
- GPU: RTX 3060
- CUDA: 11.3
- torch: 1.9.0+cu111
- torchvision : 0.10.0+cu111
and this error starts to appear
(stylegan-pytorch) C:\Users\zas47\temp\stylegan2-ada-pytorch>python train.py --outdir C:\Users\zas47\temp\stylegan2-ada-pytorch\datasets\output --data C:\Users\zas47\temp\stylegan2-ada-pytorch\datasets\converted64 --snap 3 --batch 8
Training options:
{
"num_gpus": 1,
"image_snapshot_ticks": 3,
"network_snapshot_ticks": 3,
"metrics": [
"fid50k_full"
],
"random_seed": 0,
"training_set_kwargs": {
"class_name": "training.dataset.ImageFolderDataset",
"path": "C:\\Users\\zas47\\temp\\stylegan2-ada-pytorch\\datasets\\converted64",
"use_labels": false,
"max_size": 1000,
"xflip": false,
"resolution": 64
},
"data_loader_kwargs": {
"pin_memory": true,
"num_workers": 3,
"prefetch_factor": 2
},
"G_kwargs": {
"class_name": "training.networks.Generator",
"z_dim": 512,
"w_dim": 512,
"mapping_kwargs": {
"num_layers": 2
},
"synthesis_kwargs": {
"channel_base": 16384,
"channel_max": 512,
"num_fp16_res": 4,
"conv_clamp": 256
}
},
"D_kwargs": {
"class_name": "training.networks.Discriminator",
"block_kwargs": {},
"mapping_kwargs": {},
"epilogue_kwargs": {
"mbstd_group_size": 4
},
"channel_base": 16384,
"channel_max": 512,
"num_fp16_res": 4,
"conv_clamp": 256
},
"G_opt_kwargs": {
"class_name": "torch.optim.Adam",
"lr": 0.0025,
"betas": [
0,
0.99
],
"eps": 1e-08
},
"D_opt_kwargs": {
"class_name": "torch.optim.Adam",
"lr": 0.0025,
"betas": [
0,
0.99
],
"eps": 1e-08
},
"loss_kwargs": {
"class_name": "training.loss.StyleGAN2Loss",
"r1_gamma": 0.0256
},
"total_kimg": 25000,
"batch_size": 8,
"batch_gpu": 8,
"ema_kimg": 10.0,
"ema_rampup": 0.05,
"ada_target": 0.6,
"augment_kwargs": {
"class_name": "training.augment.AugmentPipe",
"xflip": 1,
"rotate90": 1,
"xint": 1,
"scale": 1,
"rotate": 1,
"aniso": 1,
"xfrac": 1,
"brightness": 1,
"contrast": 1,
"lumaflip": 1,
"hue": 1,
"saturation": 1
},
"run_dir": "C:\\Users\\zas47\\temp\\stylegan2-ada-pytorch\\datasets\\output\\00005-converted64-auto1-batch8"
}
Output directory: C:\Users\zas47\temp\stylegan2-ada-pytorch\datasets\output\00005-converted64-auto1-batch8
Training data: C:\Users\zas47\temp\stylegan2-ada-pytorch\datasets\converted64
Training duration: 25000 kimg
Number of GPUs: 1
Number of images: 1000
Image resolution: 64
Conditional model: False
Dataset x-flips: False
Creating output directory...
Launching processes...
Loading training set...
Num images: 1000
Image shape: [3, 64, 64]
Label shape: [0]
Constructing networks...
Traceback (most recent call last):
File "train.py", line 538, in <module>
main() # pylint: disable=no-value-for-parameter
File "C:\Users\zas47\Anaconda3\envs\stylegan-pytorch\lib\site-packages\click\core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "C:\Users\zas47\Anaconda3\envs\stylegan-pytorch\lib\site-packages\click\core.py", line 1055, in main
rv = self.invoke(ctx)
File "C:\Users\zas47\Anaconda3\envs\stylegan-pytorch\lib\site-packages\click\core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "C:\Users\zas47\Anaconda3\envs\stylegan-pytorch\lib\site-packages\click\core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "C:\Users\zas47\Anaconda3\envs\stylegan-pytorch\lib\site-packages\click\decorators.py", line 26, in new_func
return f(get_current_context(), *args, **kwargs)
File "train.py", line 531, in main
subprocess_fn(rank=0, args=args, temp_dir=temp_dir)
File "train.py", line 383, in subprocess_fn
training_loop.training_loop(rank=rank, **args)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\training\training_loop.py", line 166, in training_loop
img = misc.print_module_summary(G, [z, c])
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\torch_utils\misc.py", line 209, in print_module_summary
outputs = module(*inputs)
File "C:\Users\zas47\Anaconda3\envs\stylegan-pytorch\lib\site-packages\torch\nn\modules\module.py", line 1071, in _call_impl
result = forward_call(*input, **kwargs)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\training\networks.py", line 499, in forward
img = self.synthesis(ws, **synthesis_kwargs)
File "C:\Users\zas47\Anaconda3\envs\stylegan-pytorch\lib\site-packages\torch\nn\modules\module.py", line 1071, in _call_impl
result = forward_call(*input, **kwargs)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\training\networks.py", line 471, in forward
x, img = block(x, img, cur_ws, **block_kwargs)
File "C:\Users\zas47\Anaconda3\envs\stylegan-pytorch\lib\site-packages\torch\nn\modules\module.py", line 1071, in _call_impl
result = forward_call(*input, **kwargs)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\training\networks.py", line 398, in forward
x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
File "C:\Users\zas47\Anaconda3\envs\stylegan-pytorch\lib\site-packages\torch\nn\modules\module.py", line 1071, in _call_impl
result = forward_call(*input, **kwargs)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\training\networks.py", line 299, in forward
x = modulated_conv2d(x=x, weight=self.weight, styles=styles, noise=noise, up=self.up,
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\torch_utils\misc.py", line 98, in decorator
return fn(*args, **kwargs)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\training\networks.py", line 65, in modulated_conv2d
x = conv2d_resample.conv2d_resample(x=x, w=weight.to(x.dtype), f=resample_filter, up=up, down=down, padding=padding, flip_weight=flip_weight)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\torch_utils\misc.py", line 98, in decorator
return fn(*args, **kwargs)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\torch_utils\ops\conv2d_resample.py", line 147, in conv2d_resample
return _conv2d_wrapper(x=x, w=w, padding=[py0,px0], groups=groups, flip_weight=flip_weight)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\torch_utils\ops\conv2d_resample.py", line 54, in _conv2d_wrapper
return op(x, w, stride=stride, padding=padding, groups=groups)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\torch_utils\ops\conv2d_gradfix.py", line 37, in conv2d
return _conv2d_gradfix(transpose=False, weight_shape=weight.shape, stride=stride, padding=padding, output_padding=0, dilation=dilation, groups=groups).apply(input, weight, bias)
File "C:\Users\zas47\temp\stylegan2-ada-pytorch\torch_utils\ops\conv2d_gradfix.py", line 112, in forward
output = torch.nn.functional.conv2d(input=input, weight=weight, bias=bias, **common_kwargs)
RuntimeError: CUDA out of memory. Tried to allocate 1.10 GiB (GPU 0; 12.00 GiB total capacity; 332.11 MiB already allocated; 9.43 GiB free; 364.00 MiB reserved in total by PyTorch)
I’ve tried multiple ways to avoid the above error as mentioned issue 16417 like:
- Decreasing batch size (I tried batch 1 but it’s not working either)
- torch.cuda.empty_cache()
- gc.collect() to remove unsed variables.
I don’t get the problem, if there’s enough free GPU memory why this error is occuring?
your help is really appreciated