禁用和抑制错误#
创建时间:2025 年 7 月 28 日 | 最后更新时间:2025 年 7 月 28 日
对于某些模型架构,模型中存在一些特别难以编译的部分 — 要么存在很多图中断,要么会崩溃。您可能希望显式禁用模型中这些有问题的部分,以便您可以将 torch.compile 应用于可正常工作的部件。您可以通过使用 @torch.compiler.disable 装饰器来实现。当 torch.compile 尝试调用已禁用的函数时,它会中断图并跳过禁用函数的跟踪,在调用之后恢复跟踪。默认情况下,从已禁用函数进行的所有递归调用也都会被禁用。使用 recursive=False 选项可以允许编译递归调用。
def inner1(x):
torch._dynamo.graph_break() # not traced
return x + 1 # not traced
@torch.compiler.disable
def outer1(x):
x = x + 2 # not traced
torch._dynamo.graph_break() # not traced
return inner1(x)
@torch.compile
def f(x):
x = outer1(x)
return x + 4 # traced
print(f(torch.ones(3)))
tensor([8., 8., 8.])
Graph break in user code at /tmp/ipykernel_621/1421264493.py:13
Graph Break Reason: Skip calling `torch.compiler.disable()`d function
Explanation: Skip calling function `<function outer1 at 0x7f1a1831f760>` since it was wrapped with `torch.compiler.disable` (reason: None)
Hint: Remove the `torch.compiler.disable` call
Developer debug context: <function outer1 at 0x7f1a1831f760>
For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0098.html
User code traceback:
File "/opt/conda/envs/py_3.10/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/envs/py_3.10/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance
app.start()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 739, in start
self.io_loop.start()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
File "/opt/conda/envs/py_3.10/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
self._run_once()
File "/opt/conda/envs/py_3.10/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
handle._run()
File "/opt/conda/envs/py_3.10/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 519, in dispatch_queue
await self.process_one()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 508, in process_one
await dispatch(*args)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 400, in dispatch_shell
await result
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 767, in execute_request
reply_content = await reply_content
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 455, in do_execute
res = shell.run_cell(
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3006, in run_cell
result = self._run_cell(
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3061, in _run_cell
result = runner(coro)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
coro.send(None)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3266, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3445, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3505, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "/tmp/ipykernel_621/1421264493.py", line 16, in <module>
print(f(torch.ones(3)))
File "/tmp/ipykernel_621/1421264493.py", line 13, in f
x = outer1(x)
TRACED GRAPH
===== __compiled_fn_4_ddf910cd_9f2d_42e3_8aa0_06652460f2c0 =====
/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/fx/_lazy_graph_module.py class GraphModule(torch.nn.Module):
def forward(self, L_stack0_: "f32[3][1]cpu"):
l_stack0_ = L_stack0_
# File: /tmp/ipykernel_621/1421264493.py:14 in torch_dynamo_resume_in_f_at_13, code: return x + 4 # traced
add: "f32[3][1]cpu" = l_stack0_ + 4; l_stack0_ = None
return (add,)
def inner2(x):
torch._dynamo.graph_break() # traced
return x + 1 # traced
@torch.compiler.disable(recursive=False)
def outer2(x):
x = x + 2 # not traced
torch._dynamo.graph_break() # not traced
return inner2(x)
@torch.compile
def g(x):
x = outer2(x)
return x + 4 # traced
print(g(torch.ones(3)))
tensor([8., 8., 8.])
Graph break in user code at /tmp/ipykernel_621/881423632.py:13
Graph Break Reason: Skip inlining `torch.compiler.disable()`d function
Explanation: Skip inlining function <function outer2 at 0x7f19bb4b2e60> since it was wrapped with `torch.compiler.disable` (reason: None)
Hint: Remove the `torch.compiler.disable` call
Developer debug context: <function outer2 at 0x7f19bb4b2e60>
For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0099.html
User code traceback:
File "/opt/conda/envs/py_3.10/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/envs/py_3.10/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance
app.start()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 739, in start
self.io_loop.start()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
File "/opt/conda/envs/py_3.10/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
self._run_once()
File "/opt/conda/envs/py_3.10/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
handle._run()
File "/opt/conda/envs/py_3.10/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 519, in dispatch_queue
await self.process_one()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 508, in process_one
await dispatch(*args)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 400, in dispatch_shell
await result
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 767, in execute_request
reply_content = await reply_content
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 455, in do_execute
res = shell.run_cell(
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3006, in run_cell
result = self._run_cell(
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3061, in _run_cell
result = runner(coro)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
coro.send(None)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3266, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3445, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3505, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "/tmp/ipykernel_621/881423632.py", line 16, in <module>
print(g(torch.ones(3)))
File "/tmp/ipykernel_621/881423632.py", line 13, in g
x = outer2(x)
Graph break in user code at /tmp/ipykernel_621/881423632.py:2
Graph Break Reason: Call to `torch._dynamo.graph_break()`
Explanation: User-inserted graph break. Message: None
Hint: Remove the `torch._dynamo.graph_break()` call.
Developer debug context: Called `torch._dynamo.graph_break()` with args `[]`, kwargs `{}`
For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0025.html
User code traceback:
File "/opt/conda/envs/py_3.10/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/envs/py_3.10/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance
app.start()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 739, in start
self.io_loop.start()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
File "/opt/conda/envs/py_3.10/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
self._run_once()
File "/opt/conda/envs/py_3.10/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
handle._run()
File "/opt/conda/envs/py_3.10/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 519, in dispatch_queue
await self.process_one()
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 508, in process_one
await dispatch(*args)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 400, in dispatch_shell
await result
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 767, in execute_request
reply_content = await reply_content
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 455, in do_execute
res = shell.run_cell(
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3006, in run_cell
result = self._run_cell(
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3061, in _run_cell
result = runner(coro)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
coro.send(None)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3266, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3445, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3505, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "/tmp/ipykernel_621/881423632.py", line 16, in <module>
print(g(torch.ones(3)))
File "/tmp/ipykernel_621/881423632.py", line 13, in g
x = outer2(x)
File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 196, in nonrecursive_disable_wrapper
return fn(*args, **kwargs)
File "/tmp/ipykernel_621/881423632.py", line 9, in outer2
return inner2(x)
File "/tmp/ipykernel_621/881423632.py", line 2, in inner2
torch._dynamo.graph_break() # traced
TRACED GRAPH
===== __compiled_fn_12_2510e5ed_bdf7_4366_b3df_b14b9b6e6a58 =====
/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/fx/_lazy_graph_module.py class GraphModule(torch.nn.Module):
def forward(self, L_x_: "f32[3][1]cpu"):
l_x_ = L_x_
# File: /tmp/ipykernel_621/881423632.py:3 in torch_dynamo_resume_in_inner2_at_2, code: return x + 1 # traced
add: "f32[3][1]cpu" = l_x_ + 1; l_x_ = None
return (add,)
TRACED GRAPH
===== __compiled_fn_14_ac8ce4a2_b2c9_4625_b2ad_2d36006355a5 =====
/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/fx/_lazy_graph_module.py class GraphModule(torch.nn.Module):
def forward(self, L_stack0_: "f32[3][1]cpu"):
l_stack0_ = L_stack0_
# File: /tmp/ipykernel_621/881423632.py:14 in torch_dynamo_resume_in_g_at_13, code: return x + 4 # traced
add: "f32[3][1]cpu" = l_stack0_ + 4; l_stack0_ = None
return (add,)
例如,可以使用 torch.compiler.disable 来禁用推荐模型中稀疏架构上的 torch.compile,因为稀疏架构难以编译。预处理和日志记录函数是其他常见的会导致大量图中断且从编译中获益不多的函数的例子。
如果您遇到编译器崩溃并且希望继续进行,可以将 torch._dynamo.config.suppress_errors = True 设置为 True。当编译器崩溃时,我们将跳过该函数的跟踪,稍后重试。这不是最佳实践 — 最好最终根据需要手动添加 disable 注释。