Hello,
I upraded my pytorch to 2.1 and there seem to be issues when I am running it on GPU on the slurm cluster I use.
File "/storage/home/hcoda1/6/user123/VIT/model_reg_square.py", line 292, in <module>
y_pred = model(x_masked, attn_mask)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 328, in _fn
return fn(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 490, in catch_errors
return callback(frame, cache_entry, hooks, frame_state)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 641, in _convert_frame
result = inner_convert(frame, cache_size, hooks, frame_state)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 133, in _fn
return fn(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 389, in _convert_frame_assert
return _compile(
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 569, in _compile
guarded_code = compile_inner(code, one_graph, hooks, transform)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 189, in time_wrapper
r = func(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 491, in compile_inner
out_code = transform_code_object(code, transform)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/bytecode_transformation.py", line 1028, in transform_code_object
transformations(instructions, code_options)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 458, in transform
tracer.run()
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 2069, in run
super().run()
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 719, in run
and self.step()
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 683, in step
getattr(self, inst.opname)(inst)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 2157, in RETURN_VALUE
self.output.compile_subgraph(
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 833, in compile_subgraph
self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 957, in compile_and_call_fx_graph
compiled_fn = self.call_user_compiler(gm)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 189, in time_wrapper
r = func(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 1024, in call_user_compiler
raise BackendCompilerFailed(self.compiler_fn, e).with_traceback(
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 1009, in call_user_compiler
compiled_fn = compiler_fn(gm, self.example_inputs())
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/repro/after_dynamo.py", line 117, in debug_wrapper
compiled_gm = compiler_fn(gm, example_inputs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/__init__.py", line 1568, in __call__
return compile_fx(model_, inputs_, config_patches=self.config)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 1150, in compile_fx
return aot_autograd(
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/backends/common.py", line 55, in compiler_fn
cg = aot_module_simplified(gm, example_inputs, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 3891, in aot_module_simplified
compiled_fn = create_aot_dispatcher_function(
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 189, in time_wrapper
r = func(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 3429, in create_aot_dispatcher_function
compiled_fn = compiler_fn(flat_fn, fake_flat_args, aot_config, fw_metadata=fw_metadata)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 2212, in aot_wrapper_dedupe
return compiler_fn(flat_fn, leaf_flat_args, aot_config, fw_metadata=fw_metadata)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 2392, in aot_wrapper_synthetic_base
return compiler_fn(flat_fn, flat_args, aot_config, fw_metadata=fw_metadata)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 2917, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 189, in time_wrapper
r = func(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 1092, in fw_compiler_base
return inner_compile(
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/repro/after_aot.py", line 80, in debug_wrapper
inner_compiled_fn = compiler_fn(gm, example_inputs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/debug.py", line 228, in inner
return fn(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 54, in newFunction
return old_func(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 341, in compile_fx_inner
compiled_graph: CompiledFxGraph = fx_codegen_and_compile(
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 565, in fx_codegen_and_compile
compiled_fn = graph.compile_to_fn()
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/graph.py", line 970, in compile_to_fn
return self.compile_to_module().call
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 189, in time_wrapper
r = func(*args, **kwargs)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/graph.py", line 941, in compile_to_module
mod = PyCodeCache.load_by_key_path(key, path, linemap=linemap)
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 1139, in load_by_key_path
exec(code, mod.__dict__, mod.__dict__)
File "/scratch/4152356/torchinductor_user123/qz/cqzkkur7b6tqcm6ybza4x3syp4mkqpyaemiydeztjwogbypaudam.py", line 1070, in <module>
async_compile.wait(globals())
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 1418, in wait
scope[key] = result.result()
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 1277, in result
self.future.result()
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/concurrent/futures/_base.py", line 458, in result
return self.__get_result()
File "/storage/home/hcoda1/6/user123/.conda/envs/myenv/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
torch._dynamo.exc.BackendCompilerFailed: backend='inductor' raised:
PermissionError: [Errno 13] Permission denied: 'ldconfig'
If i try to run ldconfig
, the cluster shows command not found. How do I move forward?