Hi, i’m trying to run two small models on the same GPU in parallel. But the result is not as my expect. They run slower than the sequential version.
I’m trying to adjust the batchsize, the num_iter, but the parallel one always be slower. Am i missing somethings ?
The code:
from torchvision.models import resnet18, ResNet18_Weights, resnet34,ResNet34_Weights
import torch
import time
import torch.multiprocessing as mp
def model18():
return resnet18(weights = ResNet18_Weights.IMAGENET1K_V1).cuda().eval()
def model34():
return resnet34(weights=ResNet34_Weights.IMAGENET1K_V1).cuda().eval()
def infer(model):
data = torch.zeros(32,3,224,224).cuda()
output = []
for i in range(1000):
output = model(data).cpu().detach().numpy()
if i%100==0:
print(i,end=" ")
print(f"Done {output.shape}")
if __name__=="__main__":
mp.set_start_method("spawn")
# m = model()
m1 = model18()
m2 = model34()
time.sleep(1)
# print(m1==m2, m1==m)
#
p1 = mp.Process(target=infer, args=(m1,))
p2 = mp.Process(target=infer, args=(m2,))
print("Inited")
print("sequential")
start = time.time()
infer(m1)
print(time.time()-start)
infer(m2)
print(time.time()-start)
print("Done sequential.....................")
print()
print("Parallel")
startt = time.time()
p1.start()
p2.start()
print("Started:", time.time()-startt)
p1.join()
p2.join()
print(time.time()-startt)
print("Done parallel.....................")
The result in the console:
Inited
sequential
0 100 200 300 400 500 600 700 800 900 Done (32, 1000)
51.6720654964447
0 100 200 300 400 500 600 700 800 900 Done (32, 1000)
156.92688918113708
Done sequential.....................
Parallel
0.5700843334197998
0 0 100 100 200 300 200 400 500 300 600 700 400 800 500 900 Done (32, 1000)
600 700 800 900 Done (32, 1000)
185.4344515800476
Done parallel.....................```