I’m using jupyterlab, and prototyping some model architectures. I am use the magic command
%lprun -f train_loop train_loop(model,X_train,y_train,X_test = X_test, y_test=y_test,**training_params)
to try to locate inefficiencies. Here is the issue: When I comment out lines that are the largest offenders in compute time, then other lines of code change drastically to become the new largest offenders. For example, see this results first:
Of interest are lines 69 and 94. We will see that 94 changes hugely after commenting out 69.
Function: train_loop at line 11
Line # Hits Time Per Hit % Time Line Contents
11 def train_loop(model,X,y,
12 RANDOM = True,
13 X_test = None,
14 y_test = None,
15 **params):
16
17 1 770.0 770.0 0.0 if not RANDOM:
18 torch.manual_seed(0)
19 else:
20 1 78762.0 78762.0 0.0 torch.manual_seed(datetime.now().microsecond + datetime.now().second*1000000)
21
22 1 29001.0 29001.0 0.0 device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
23
24 1 1221127.0 1e+06 0.0 dataset = SequenceDataset(X = X, y = y, seq_len = params.get('SEQ_LEN'), stride = params.get('STRIDE'), lock_gpu = params.get('LOCK_GPU') )
25 1 77002.0 77002.0 0.0 loader = DataLoader( dataset, batch_size = params.get('BATCH_SIZE'), shuffle = True,)
26 1 3950.0 3950.0 0.0 loader = DeviceDataLoader( loader, device = device )
27
28 1 240.0 240.0 0.0 if X_test is not None and y_test is not None:
29 1 698361.0 698361.0 0.0 test_dataset = SequenceDataset(X = X_test, y = y_test, seq_len = params.get('SEQ_LEN'), stride = params.get('STRIDE'),lock_gpu = params.get('LOCK_GPU') )
30 1 35281.0 35281.0 0.0 test_loader = DataLoader( test_dataset, batch_size = params.get('BATCH_SIZE'), shuffle = True,)
31 1 1350.0 1350.0 0.0 test_loader = DeviceDataLoader( test_loader, device = device )
32
33 1 70002.0 70002.0 0.0 loss_fn = nn.MSELoss()
34 1 267578.0 267578.0 0.0 optimizer = torch.optim.Adam( model.parameters() , lr = params.get('LR_INIT'))
35 1 18710.0 18710.0 0.0 scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, **params.get('SCHEDULE_PARAMS') )
36
37 1 190.0 190.0 0.0 losses = []
38 1 130.0 130.0 0.0 test_losses = []
39 1 130.0 130.0 0.0 lrs = []
40
41
42 1 190.0 190.0 0.0 smooth_loss = 1e12
43 1 170.0 170.0 0.0 best_loss = 1e12
44 1 110.0 110.0 0.0 best_test_loss = 1e12
45 1 130.0 130.0 0.0 test_loss = 1e12
46 1 130.0 130.0 0.0 count_train = 0
47 1 120.0 120.0 0.0 count_test = 0
48 101 42852986.0 424287.0 0.8 for epoch in pbar(range(params.get('N_epoch')),'training'):
49
50 100 8369447.0 83694.5 0.2 model.train()
51 200 62210864.0 311054.3 1.2 for X_batch,y_batch in loader: ## learn with the batches
52
53 100 79356.0 793.6 0.0 if params.get('NEEDS_MASK'):
54 mask = torch.ones( X_batch.shape[0],X_batch.shape[1] ) > 0
55 yp = model( X_batch, mask.to(device)) ## add the mask for transformer model
56
57 else:
58 100 45856456.0 458564.6 0.9 yp = model(X_batch)
59 100 6112924.0 61129.2 0.1 loss = loss_fn(y_batch,yp)
60 100 107527048.0 1e+06 2.0 loss.backward()
61 100 42534554.0 425345.5 0.8 optimizer.step()
62 100 10723904.0 107239.0 0.2 optimizer.zero_grad()
63
64 100 4447701.0 44477.0 0.1 smooth_loss = params.get('SMOOTH_ALPHA') * smooth_loss + (1-params.get('SMOOTH_ALPHA')) * loss.data
65 100 149175.0 1491.8 0.0 losses.append(loss.data)
66 100 88152.0 881.5 0.0 lrs.append( optimizer.param_groups[0]['lr'])
67
68 # updates
69 100 4964876500.0 5e+07 93.0 scheduler.step( smooth_loss )
70
71 100 44220.0 442.2 0.0 count_train += 1
72 100 19840.0 198.4 0.0 count_test += 1
73 100 82274.0 822.7 0.0 if count_train > params.get('PATIENCE') > 0:
74 print('early stopping at epoch: {} because train loss not improving'.format(epoch+1))
75 break
76 100 38500.0 385.0 0.0 if count_test > params.get('PATIENCE') > 0:
77 print('early stopping at epoch: {} because test loss not improving'.format(epoch+1))
78 break
79
80
81 100 24980.0 249.8 0.0 if X_test is not None and y_test is not None:
82 100 33551.0 335.5 0.0 test_losses = []
83 100 58851.0 588.5 0.0 if not epoch % params.get('TEST_EVERY'): ## only do the comparison with eval test loop once every so often for added speed
84 2 13111.0 6555.5 0.0 with torch.no_grad():
85 2 546046.0 273023.0 0.0 for X_foo,y_foo in test_loader:
86 1 610.0 610.0 0.0 if params.get('NEEDS_MASK'):
87 mask = torch.ones( X_foo.shape[0],X_foo.shape[1] ) > 0
88 yp = model( X_foo, mask.to(device)) ## add the mask for transformer model
89 else:
90 1 401722.0 401722.0 0.0 yp = model(X_foo)
91 1 70632.0 70632.0 0.0 loss = loss_fn(y_foo,yp)
92 1 18461.0 18461.0 0.0 test_losses.append( loss.data * X_foo.shape[0] ) ## scale by batchsize to account for the final non-full batch
93 1 43752.0 43752.0 0.0 test_loss = sum(test_losses) / len( test_dataset )
94 1 550.0 550.0 0.0 test_losses.append(test_loss)
95 100 33478558.0 334785.6 0.6 if smooth_loss < (1-params.get('TOLERANCE'))*best_loss:
96 100 122072.0 1220.7 0.0 best_loss = smooth_loss
97 100 20541.0 205.4 0.0 count_train = 0
98
99 100 3230168.0 32301.7 0.1 if test_loss < (1-params.get('TOLERANCE'))*best_test_loss:
100 1 180.0 180.0 0.0 best_test_loss = test_loss
101 1 160.0 160.0 0.0 count_test = 0
102
103 100 95133.0 951.3 0.0 if not( epoch % params.get(‘PRINT_EVERY’)):
104 1 44571.0 44571.0 0.0 string = 'epoch {:} of {:}. \n\tTrain RMSE: {:.6f}. ‘.format(epoch+1,params.get(‘N_epoch’),smooth_loss0.5)
105 1 42052.0 42052.0 0.0 string += ‘Early stop train RMSE: {:.6f}.’.format( (1-params.get(‘TOLERANCE’))*best_loss0.5 )
106 1 1500.0 1500.0 0.0 string += ’ Counter at {} of {}’.format(count_train, params.get(‘PATIENCE’))
107 1 270.0 270.0 0.0 if X_test is not None and y_test is not None:
108 1 63182.0 63182.0 0.0 string += ‘\n\tTest RMSE: {:.6f}. Early stop test RMSE: {:.6f}’.format( test_loss0.5, (1-params.get(‘TOLERANCE’))*best_test_loss0.5 )
109 1 1300.0 1300.0 0.0 string += ‘. Counter at {} of {}’.format(count_test, params.get(‘PATIENCE’))
110 1 196216.0 196216.0 0.0 tqdm.write(string)
111
112 2 1280.0 640.0 0.0 meta_data = dict( losses = losses,
113 1 130.0 130.0 0.0 lrs = lrs,
114 1 110.0 110.0 0.0 test_losses = test_losses)
115 1 220.0 220.0 0.0 return model,meta_data
And then I comment out line 69, and then run again and get
Timer unit: 1e-09 s
Total time: 5.32486 s
File: /home/***/neural_helpers/trainer.py
Function: train_loop at line 11
Line # Hits Time Per Hit % Time Line Contents
11 def train_loop(model,X,y,
12 RANDOM = True,
13 X_test = None,
14 y_test = None,
15 **params):
16
17 1 961.0 961.0 0.0 if not RANDOM:
18 torch.manual_seed(0)
19 else:
20 1 85242.0 85242.0 0.0 torch.manual_seed(datetime.now().microsecond + datetime.now().second*1000000)
21
22 1 33161.0 33161.0 0.0 device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
23
24 1 1321680.0 1e+06 0.0 dataset = SequenceDataset(X = X, y = y, seq_len = params.get('SEQ_LEN'), stride = params.get('STRIDE'), lock_gpu = params.get('LOCK_GPU') )
25 1 76352.0 76352.0 0.0 loader = DataLoader( dataset, batch_size = params.get('BATCH_SIZE'), shuffle = True,)
26 1 4021.0 4021.0 0.0 loader = DeviceDataLoader( loader, device = device )
27
28 1 220.0 220.0 0.0 if X_test is not None and y_test is not None:
29 1 709931.0 709931.0 0.0 test_dataset = SequenceDataset(X = X_test, y = y_test, seq_len = params.get('SEQ_LEN'), stride = params.get('STRIDE'),lock_gpu = params.get('LOCK_GPU') )
30 1 35661.0 35661.0 0.0 test_loader = DataLoader( test_dataset, batch_size = params.get('BATCH_SIZE'), shuffle = True,)
31 1 1420.0 1420.0 0.0 test_loader = DeviceDataLoader( test_loader, device = device )
32
33 1 38511.0 38511.0 0.0 loss_fn = nn.MSELoss()
34 1 276789.0 276789.0 0.0 optimizer = torch.optim.Adam( model.parameters() , lr = params.get('LR_INIT'))
35 1 22410.0 22410.0 0.0 scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, **params.get('SCHEDULE_PARAMS') )
36
37 1 220.0 220.0 0.0 losses = []
38 1 170.0 170.0 0.0 test_losses = []
39 1 140.0 140.0 0.0 lrs = []
40
41
42 1 180.0 180.0 0.0 smooth_loss = 1e12
43 1 140.0 140.0 0.0 best_loss = 1e12
44 1 130.0 130.0 0.0 best_test_loss = 1e12
45 1 120.0 120.0 0.0 test_loss = 1e12
46 1 140.0 140.0 0.0 count_train = 0
47 1 130.0 130.0 0.0 count_test = 0
48 101 42798216.0 423744.7 0.8 for epoch in pbar(range(params.get('N_epoch')),'training'):
49
50 100 8726028.0 87260.3 0.2 model.train()
51 200 59525744.0 297628.7 1.1 for X_batch,y_batch in loader: ## learn with the batches
52
53 100 75803.0 758.0 0.0 if params.get('NEEDS_MASK'):
54 mask = torch.ones( X_batch.shape[0],X_batch.shape[1] ) > 0
55 yp = model( X_batch, mask.to(device)) ## add the mask for transformer model
56
57 else:
58 100 46154101.0 461541.0 0.9 yp = model(X_batch)
59 100 6101912.0 61019.1 0.1 loss = loss_fn(y_batch,yp)
60 100 108411826.0 1e+06 2.0 loss.backward()
61 100 42584990.0 425849.9 0.8 optimizer.step()
62 100 10716192.0 107161.9 0.2 optimizer.zero_grad()
63
64 100 4460530.0 44605.3 0.1 smooth_loss = params.get('SMOOTH_ALPHA') * smooth_loss + (1-params.get('SMOOTH_ALPHA')) * loss.data
65 100 150536.0 1505.4 0.0 losses.append(loss.data)
66 100 91022.0 910.2 0.0 lrs.append( optimizer.param_groups[0]['lr'])
67
68 # updates
69 # scheduler.step( smooth_loss )
70
71 100 46360.0 463.6 0.0 count_train += 1
72 100 19401.0 194.0 0.0 count_test += 1
73 100 73262.0 732.6 0.0 if count_train > params.get('PATIENCE') > 0:
74 print('early stopping at epoch: {} because train loss not improving'.format(epoch+1))
75 break
76 100 36710.0 367.1 0.0 if count_test > params.get('PATIENCE') > 0:
77 print('early stopping at epoch: {} because test loss not improving'.format(epoch+1))
78 break
79
80
81 100 27200.0 272.0 0.0 if X_test is not None and y_test is not None:
82 100 30381.0 303.8 0.0 test_losses = []
83 100 60401.0 604.0 0.0 if not epoch % params.get('TEST_EVERY'): ## only do the comparison with eval test loop once every so often for added speed
84 2 11220.0 5610.0 0.0 with torch.no_grad():
85 2 536716.0 268358.0 0.0 for X_foo,y_foo in test_loader:
86 1 600.0 600.0 0.0 if params.get('NEEDS_MASK'):
87 mask = torch.ones( X_foo.shape[0],X_foo.shape[1] ) > 0
88 yp = model( X_foo, mask.to(device)) ## add the mask for transformer model
89 else:
90 1 415153.0 415153.0 0.0 yp = model(X_foo)
91 1 70522.0 70522.0 0.0 loss = loss_fn(y_foo,yp)
92 1 18871.0 18871.0 0.0 test_losses.append( loss.data * X_foo.shape[0] ) ## scale by batchsize to account for the final non-full batch
93 1 41061.0 41061.0 0.0 test_loss = sum(test_losses) / len( test_dataset )
94 1 550.0 550.0 0.0 test_losses.append(test_loss)
95 100 4986709509.0 5e+07 93.6 if smooth_loss < (1-params.get('TOLERANCE'))*best_loss:
96 100 130371.0 1303.7 0.0 best_loss = smooth_loss
97 100 22080.0 220.8 0.0 count_train = 0
98
99 100 3764560.0 37645.6 0.1 if test_loss < (1-params.get('TOLERANCE'))*best_test_loss:
100 1 210.0 210.0 0.0 best_test_loss = test_loss
101 1 160.0 160.0 0.0 count_test = 0
102
103 100 104125.0 1041.2 0.0 if not( epoch % params.get(‘PRINT_EVERY’)):
104 1 47252.0 47252.0 0.0 string = 'epoch {:} of {:}. \n\tTrain RMSE: {:.6f}. ‘.format(epoch+1,params.get(‘N_epoch’),smooth_loss0.5)
105 1 42021.0 42021.0 0.0 string += ‘Early stop train RMSE: {:.6f}.’.format( (1-params.get(‘TOLERANCE’))*best_loss0.5 )
106 1 2220.0 2220.0 0.0 string += ’ Counter at {} of {}’.format(count_train, params.get(‘PATIENCE’))
107 1 260.0 260.0 0.0 if X_test is not None and y_test is not None:
108 1 62492.0 62492.0 0.0 string += ‘\n\tTest RMSE: {:.6f}. Early stop test RMSE: {:.6f}’.format( test_loss0.5, (1-params.get(‘TOLERANCE’))*best_test_loss0.5 )
109 1 1370.0 1370.0 0.0 string += ‘. Counter at {} of {}’.format(count_test, params.get(‘PATIENCE’))
110 1 174425.0 174425.0 0.0 tqdm.write(string)
111
112 2 1900.0 950.0 0.0 meta_data = dict( losses = losses,
113 1 140.0 140.0 0.0 lrs = lrs,
114 1 110.0 110.0 0.0 test_losses = test_losses)
115 1 210.0 210.0 0.0 return model,meta_data
So, line 95 went from order (1e5) to order(5e7) per hit by commenting out a line above? The heck? Any ideas? It seems that no matter what mole I whack, something else pops up to about the order(e7) size.