I am tring to run the below code within jupyter notebook in my envirenment:
#---------------------------------------------------
import torch
import torchvision
torch.set_float32_matmul_precision(‘high’)
m = torchvision.models.resnet50().cuda()
mm = torch.compile(m)
data = torch.rand(1, 3, 224, 224).cuda()
o = mm(data)
#---------------------------------------------------
But I got the following error message:
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
“”"
Traceback (most recent call last):
File “/home/beck/miniconda3/envs/pytorch2/lib/python3.10/concurrent/futures/process.py”, line 246, in _process_worker
r = call_item.fn(*call_item.args, **call_item.kwargs)
File “/home/beck/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/codecache.py”, line 525, in _worker_compile
kernel.precompile(warm_cache_only_with_cc=cc)
File “/home/beck/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/triton_ops/autotune.py”, line 67, in precompile
self.launchers = [
File “/home/beck/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/triton_ops/autotune.py”, line 68, in
self._precompile_config(c, warm_cache_only_with_cc)
File “/home/beck/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/triton_ops/autotune.py”, line 81, in _precompile_config
triton.compile(
File “/home/beck/miniconda3/envs/pytorch2/lib/python3.10/site-packages/triton/compiler.py”, line 1256, in compile
asm, shared, kernel_name = _compile(fn, signature, device, constants, configs[0], num_warps, num_stages,
File “/home/beck/miniconda3/envs/pytorch2/lib/python3.10/site-packages/triton/compiler.py”, line 901, in _compile
name, asm, shared_mem = _triton.code_gen.compile_ttir(backend, module, device, num_warps, num_stages, extern_libs, cc)
RuntimeError: Internal Triton PTX codegen error:
ptxas /tmp/fileC5RW8H, line 6; error : PTX .version 7.4 does not support .target sm_89
ptxas fatal : Ptx assembly aborted due to errors
“”"
The above exception was the direct cause of the following exception:
[/details]
More message
RuntimeError Traceback (most recent call last)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/output_graph.py:692, in OutputGraph.call_user_compiler(self, gm)
691 else:
→ 692 compiled_fn = compiler_fn(gm, self.fake_example_inputs())
693 _step_logger()(logging.INFO, f"done compiler function {name}")
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/debug_utils.py:1054, in wrap_backend_debug..debug_wrapper(gm, example_inputs, **kwargs)
1053 else:
→ 1054 compiled_gm = compiler_fn(gm, example_inputs)
1056 return compiled_gm
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/init.py:1368, in TorchCompileInductorWrapper.call(self, model, inputs_)
1367 def call(self, model_, inputs_):
→ 1368 return self.compile_fn(model_, inputs_, config_patches=self.config)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/inductor/compile_fx.py:426, in compile_fx(model, example_inputs_, inner_compile, config_patches)
421 with overrides.patch_functions():
422
423 # TODO: can add logging before/after the call to create_aot_dispatcher_function
424 # in torch.functorch/aot_autograd.py::aot_module_simplified::aot_function_simplified::new_func
425 # once torchdynamo is merged into pytorch
→ 426 return aot_autograd(
427 fw_compiler=fw_compiler,
428 bw_compiler=bw_compiler,
429 decompositions=select_decomp_table(),
430 partition_fn=functools.partial(
431 min_cut_rematerialization_partition, compiler=“inductor”
432 ),
433 )(model, example_inputs_)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/optimizations/training.py:66, in aot_autograd..compiler_fn(gm, example_inputs)
65 with enable_aot_logging():
—> 66 cg = aot_module_simplified(gm, example_inputs, **kwargs)
67 counters[“aot_autograd”][“ok”] += 1
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py:2483, in aot_module_simplified(mod, args, fw_compiler, bw_compiler, partition_fn, decompositions, hasher_type, static_argnums)
2481 full_args.extend(args)
→ 2483 compiled_fn = create_aot_dispatcher_function(
2484 functional_call,
2485 full_args,
2486 aot_config,
2487 )
2489 # TODO: There is something deeply wrong here; compiled_fn running with
2490 # the boxed calling convention, but aot_module_simplified somehow
2491 # historically returned a function that was not the boxed calling
2492 # convention. This should get fixed…
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/utils.py:162, in dynamo_timed..dynamo_timed_inner..time_wrapper(*args, **kwargs)
161 t0 = time.time()
→ 162 r = func(*args, **kwargs)
163 time_spent = time.time() - t0
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py:2180, in create_aot_dispatcher_function(flat_fn, flat_args, aot_config)
2178 # You can put more passes here
→ 2180 compiled_fn = compiler_fn(flat_fn, fake_flat_args, aot_config)
2182 if not hasattr(compiled_fn, “_boxed_call”):
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py:1411, in aot_wrapper_dedupe(flat_fn, flat_args, aot_config, compiler_fn)
1410 if ok:
→ 1411 return compiler_fn(flat_fn, leaf_flat_args, aot_config)
1413 # Strategy 2: Duplicate specialize.
1414 #
1415 # In Haskell types, suppose you have:
(…)
1447 # }
1448 # keep_arg_mask = [True, True, False, True]
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py:1688, in aot_dispatch_autograd(flat_fn, flat_args, aot_config)
1687 with track_graph_compiling(aot_config, “forward”):
→ 1688 compiled_fw_func = aot_config.fw_compiler(
1689 fw_module, flat_args_with_views_handled
1690 )
1692 class CompiledFunction(torch.autograd.Function):
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/utils.py:162, in dynamo_timed..dynamo_timed_inner..time_wrapper(*args, **kwargs)
161 t0 = time.time()
→ 162 r = func(*args, **kwargs)
163 time_spent = time.time() - t0
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/compile_fx.py:401, in compile_fx..fw_compiler(model, example_inputs)
400 fixed = len(example_inputs) - num_example_inputs
→ 401 return inner_compile(
402 model,
403 example_inputs,
404 num_fixed=fixed,
405 cudagraphs=cudagraphs,
406 graph_id=graph_id,
407 )
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/debug_utils.py:594, in wrap_compiler_debug..debug_wrapper(gm, example_inputs, **kwargs)
593 else:
→ 594 compiled_fn = compiler_fn(gm, example_inputs)
596 return compiled_fn
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/debug.py:239, in DebugContext.wrap..inner(*args, **kwargs)
238 with DebugContext():
→ 239 return fn(*args, **kwargs)
File ~/miniconda3/envs/pytorch2/lib/python3.10/contextlib.py:79, in ContextDecorator.call..inner(*args, **kwds)
78 with self._recreate_cm():
—> 79 return func(*args, **kwds)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/compile_fx.py:155, in compile_fx_inner(gm, example_inputs, cudagraphs, num_fixed, is_backward, graph_id)
154 graph.run(*example_inputs)
→ 155 compiled_fn = graph.compile_to_fn()
157 if cudagraphs:
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/graph.py:570, in GraphLowering.compile_to_fn(self)
569 def compile_to_fn(self):
→ 570 return self.compile_to_module().call
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/utils.py:162, in dynamo_timed..dynamo_timed_inner..time_wrapper(*args, **kwargs)
161 t0 = time.time()
→ 162 r = func(*args, **kwargs)
163 time_spent = time.time() - t0
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/graph.py:559, in GraphLowering.compile_to_module(self)
557 print(code)
→ 559 mod = PyCodeCache.load(code)
560 for name, value in self.constants.items():
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/codecache.py:504, in PyCodeCache.load(cls, source_code)
503 mod.key = key
→ 504 exec(code, mod.dict, mod.dict)
505 # another thread might set this first
File /tmp/torchinductor_beck/vc/cvcdtwwo2tcoz6j2i46wspset3tq3xzax6hyw247h6bpr664ay5o.py:2032
2009 triton__23 = async_compile.triton(‘’’
2010 import triton
2011 import triton.language as tl
(…)
2028 tl.store(out_ptr0 + (0 + tl.zeros([XBLOCK], tl.int32)), tmp2, None)
2029 ‘’')
→ 2032 async_compile.wait(globals())
2033 del async_compile
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/codecache.py:691, in AsyncCompile.wait(self, scope)
690 if isinstance(result, (Future, TritonFuture)):
→ 691 scope[key] = result.result()
692 pbar.update(1)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_inductor/codecache.py:549, in TritonFuture.result(self)
548 # If the worker failed this will throw an exception.
→ 549 self.future.result()
550 kernel = self.kernel = _load_kernel(self.source_code)
File ~/miniconda3/envs/pytorch2/lib/python3.10/concurrent/futures/_base.py:458, in Future.result(self, timeout)
457 elif self._state == FINISHED:
→ 458 return self.__get_result()
459 else:
File ~/miniconda3/envs/pytorch2/lib/python3.10/concurrent/futures/_base.py:403, in Future.__get_result(self)
402 try:
→ 403 raise self._exception
404 finally:
405 # Break a reference cycle with the exception in self._exception
RuntimeError: Internal Triton PTX codegen error:
ptxas /tmp/fileC5RW8H, line 6; error : PTX .version 7.4 does not support .target sm_89
ptxas fatal : Ptx assembly aborted due to errors
The above exception was the direct cause of the following exception:
BackendCompilerFailed Traceback (most recent call last)
Cell In[2], line 8
5 mm = torch.compile(m)
7 data = torch.rand(1, 3, 224, 224).cuda()
----> 8 o = mm(data)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py:82, in OptimizedModule.forward(self, *args, **kwargs)
81 def forward(self, *args, **kwargs):
—> 82 return self.dynamo_ctx(self._orig_mod.forward)(*args, **kwargs)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py:209, in _TorchDynamoContext.call.._fn(*args, **kwargs)
207 dynamic_ctx.enter()
208 try:
→ 209 return fn(*args, **kwargs)
210 finally:
211 set_eval_frame(prior)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py:330, in catch_errors_wrapper..catch_errors(frame, cache_size)
327 return hijacked_callback(frame, cache_size, hooks)
329 with compile_lock:
→ 330 return callback(frame, cache_size, hooks)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:403, in convert_frame.._convert_frame(frame, cache_size, hooks)
401 counters[“frames”][“total”] += 1
402 try:
→ 403 result = inner_convert(frame, cache_size, hooks)
404 counters[“frames”][“ok”] += 1
405 return result
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:103, in wrap_convert_context.._fn(*args, **kwargs)
101 torch.fx.graph_module._forward_from_src = fx_forward_from_src_skip_result
102 try:
→ 103 return fn(*args, **kwargs)
104 finally:
105 torch._C._set_grad_enabled(prior_grad_mode)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:261, in convert_frame_assert.._convert_frame_assert(frame, cache_size, hooks)
258 global initial_grad_state
259 initial_grad_state = torch.is_grad_enabled()
→ 261 return _compile(
262 frame.f_code,
263 frame.f_globals,
264 frame.f_locals,
265 frame.f_builtins,
266 compiler_fn,
267 one_graph,
268 export,
269 hooks,
270 frame,
271 )
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/utils.py:162, in dynamo_timed..dynamo_timed_inner..time_wrapper(*args, **kwargs)
160 compilation_metrics[key] = []
161 t0 = time.time()
→ 162 r = func(*args, **kwargs)
163 time_spent = time.time() - t0
164 # print(f"Dynamo timer: key={key}, latency={latency:.2f} sec")
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:323, in _compile(code, globals, locals, builtins, compiler_fn, one_graph, export, hooks, frame)
321 for attempt in itertools.count():
322 try:
→ 323 out_code = transform_code_object(code, transform)
324 orig_code_map[out_code] = code
325 break
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/bytecode_transformation.py:339, in transform_code_object(code, transformations, safe)
336 instructions = cleaned_instructions(code, safe)
337 propagate_line_nums(instructions)
→ 339 transformations(instructions, code_options)
341 fix_vars(instructions, code_options)
343 dirty = True
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:310, in _compile..transform(instructions, code_options)
297 nonlocal output
298 tracer = InstructionTranslator(
299 instructions,
300 code,
(…)
308 mutated_closure_cell_contents,
309 )
→ 310 tracer.run()
311 output = tracer.output
312 assert output is not None
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py:1715, in InstructionTranslator.run(self)
1713 def run(self):
1714 _step_logger()(logging.INFO, f"torchdynamo start tracing {self.f_code.co_name}")
→ 1715 super().run()
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py:564, in InstructionTranslatorBase.run(self)
559 try:
560 self.output.push_tx(self)
561 while (
562 self.instruction_pointer is not None
563 and not self.output.should_exit
→ 564 and self.step()
565 ):
566 pass
567 except BackendCompilerFailed:
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py:527, in InstructionTranslatorBase.step(self)
525 if not hasattr(self, inst.opname):
526 unimplemented(f"missing: {inst.opname}")
→ 527 getattr(self, inst.opname)(inst)
529 return inst.opname != “RETURN_VALUE”
530 except BackendCompilerFailed:
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py:1781, in InstructionTranslator.RETURN_VALUE(self, inst)
1776 _step_logger()(
1777 logging.INFO,
1778 f"torchdynamo done tracing {self.f_code.co_name} (RETURN_VALUE)",
1779 )
1780 log.debug(“RETURN_VALUE triggered compile”)
→ 1781 self.output.compile_subgraph(self)
1782 self.output.add_output_instructions([create_instruction(“RETURN_VALUE”)])
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/output_graph.py:539, in OutputGraph.compile_subgraph(self, tx, partial_convert, reason)
525 self.add_output_instructions(random_calls_instructions)
527 if (
528 stack_values
529 and all(
(…)
536
537 # optimization to generate better code in a common case
538 self.add_output_instructions(
→ 539 self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
540 + [create_instruction(“UNPACK_SEQUENCE”, len(stack_values))]
541 )
542 else:
543 graph_output_var = self.new_var(“graph_out”)
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/output_graph.py:610, in OutputGraph.compile_and_call_fx_graph(self, tx, rv, root)
608 assert_no_fake_params_or_buffers(gm)
609 with tracing(self.tracing_context):
→ 610 compiled_fn = self.call_user_compiler(gm)
611 compiled_fn = disable(compiled_fn)
613 counters[“stats”][“unique_graphs”] += 1
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/utils.py:162, in dynamo_timed..dynamo_timed_inner..time_wrapper(*args, **kwargs)
160 compilation_metrics[key] = []
161 t0 = time.time()
→ 162 r = func(*args, **kwargs)
163 time_spent = time.time() - t0
164 # print(f"Dynamo timer: key={key}, latency={latency:.2f} sec")
File ~/miniconda3/envs/pytorch2/lib/python3.10/site-packages/torch/_dynamo/output_graph.py:697, in OutputGraph.call_user_compiler(self, gm)
695 except Exception as e:
696 compiled_fn = gm.forward
→ 697 raise BackendCompilerFailed(self.compiler_fn, e) from e
698 return compiled_fn
BackendCompilerFailed: debug_wrapper raised RuntimeError: Internal Triton PTX codegen error:
ptxas /tmp/fileC5RW8H, line 6; error : PTX .version 7.4 does not support .target sm_89
ptxas fatal : Ptx assembly aborted due to errors
Set torch._dynamo.config.verbose=True for more information
You can suppress this exception and fall back to eager by setting:
torch._dynamo.config.suppress_errors = True
[/details]