
    pi                      d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZm Z m!Z! d dl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( d dl)m*Z* d dl+Z,d dl-Z,d dl.m/c m0Z1 d dl2m3Z3 d dl,m4Z4 d dl5m6Z6 d dl7m8Z8m9Z:m	Z;m/Z< d dl=m>Z> d dl?m@Z@ d dlAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJ d dlKm9ZL d dlMmNZN d dlOmPZPmQZQmRZRmSZS d dlTmUZUmVZVmWZW d dlXmYZYmZZZm[Z[m\Z\ d dl]m^Z^ d dl_m`Z`maZa d dlbmcZcmdZdmeZemfZfmgZgmhZh d dlimjZj d dlkmlZlmmZmmnZnmoZompZpmqZqmrZrmsZsmtZt d dlumvZv d dlwmxZx d d lymzZz d d!l-m{Z{ d d"l|m}Z}m~Z~ d d#lmZ d d$lmZ d d%lmZ d&d'lmZ d&d(lmZmZ d&d)lmZ d&d*lmZ d&d+lmZ d,d-lm9Z9mZ d,d.lmZmZ d,d/lmZ d,d0lmZ d,d1lmZ d,d2lmZ d,d3lmZmZ d,d4lmZ d,d5lmZ d,d6lmZmZ d,d7lmZ d,d8lmZ d,d9l/mZmZmZmZmZmZmZmZmZ d,d:lmZ er d d;lmZmZ d d<lbmZ d d=lmZ d d>lmZ d,d?lmZ  e%d@      Z e dA      Zes e9j                         s	ddBZddCZnd dDlmZmZ erd dlZd dElmZmZmZ  G dF dGej                        Ze G dH dI             ZddJZddKZ eͫ       Zeϐj                  Zeϐj                  Zeϐj                  Z e	j                  e׫      Ze,j                  j                  edL      Ze,j                  j                  edM      Ze,j                  j                  edN      Ze,j                  j                  edO      Ze,j                  j                  edP      ZddQZddRZddSZddTZ ej                  d      ddU       Zej                  ddV       ZddWZ	 	 	 	 	 	 	 	 ddXZ	 d	 	 	 	 	 ddZZ	 	 	 	 	 	 dd[Z	 d	 	 	 	 	 dd\Zddd]Z	 	 	 d	 	 	 	 	 	 	 	 	 dd_Zdd`Z	 	 	 	 ddaZ	 	 	 	 	 	 ddbZ	 d	 	 	 	 	 	 	 ddcZ	 d	 	 	 dddZej                  dde       Z G df dge'dYh      Z G di dje&      Z	 	 	 	 	 	 	 	 ddkZ edlm      	 	 	 	 	 	 	 	 ddn       Z G do dp      Z G dq dre      Z G ds dte      Z	 	 	 	 	 	 	 	 	 	 dduZ	 	 	 	 	 	 ddvZ	 ddwdwdwdx	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddyZddzZ 	 	 	 	 	 	 	 	 dd{Z	 d	 	 	 	 	 	 	 dd|Zedf	 	 	 	 	 	 	 	 	 dd}Z ed       Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd~ZddZddZ	 	 	 	 	 	 	 	 ddZddZ	 ed^       G d d             Z
ddZedYf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZef	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZedddYf	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 ddZddZ	 ddd	 	 	 	 	 	 	 	 	 ddZy)    )annotationsN)ABCabstractmethod)defaultdict)AbstractContextManager)	dataclass)currentframe)count)
attrgetter)AnyCallableOptionalTYPE_CHECKINGTypeVarUnion)Neveroverride	ParamSpecProtocol	TypedDictUnpack)mock)#min_cut_rematerialization_partition)fx)enable_python_dispatcher)compiled_autogradconfigloggingutils)get_interface_for_device)wrap_compiler_debug)	chromium_event_timedCompileEventLoggercountersdetect_fake_modedynamo_timedflatten_graph_inputsget_metrics_contextlazy_format_graph_codeset_feature_use)r   )!unwrap_tensor_subclass_parameters)aot_export_moduleGraphOutputNamemake_boxed_funcSerializableAOTDispatchCompiler)	code_hashFxGraphCacheoutput_code_log)BoxedDeviceIndexformat_default_skip_message#log_cudagraph_skip_and_bump_counterPlaceholderInfo)CustomPartitionerFn)"create_mapping_pre_post_grad_nodessave_args_for_compile_fx_inner)CompiledAOTICompiledFxGraphCompiledFxGraphConstantsWithGmget_expanded_dimsindex_expanded_dims
OutputCode)	cache_dir)		BoxedBoolcount_tangentsfresh_cacheget_all_devices	InputTypeis_gpushould_assume_input_aligned should_use_remote_fx_graph_cachetensor_is_aligned)FakeScriptObject)trace_structured)compile_time_strobelight_meta)GraphModule)free_unbacked_symbolsSymExprPrinter)FakeTensorProp)_WaitCounter)
OrderedSet   )aot_autograd)ShortenTraceback	SkipFrame)_use_lazy_graph_module)_PyTreeCodeGen)
has_triton   )r   metrics)get_wrapper_codegen_for_deviceinit_backend_registration)DebugContext)select_decomp_table)InductorError)joint_graph_passes)post_grad_passesview_to_reshape)pre_grad_passes)GraphLowering)get_device_typeIRNode)complex_memory_overlap)TritonBundler)	align_inputs_from_check_idxsclone_preserve_stridescopy_misaligned_inputs get_cloned_parameter_buffer_name%get_first_incompatible_cudagraph_node#maybe_get_suppress_shape_guards_ctxoutput_noderemove_unaligned_input_idxsshape_env_from_inputs)V)	GeneratorSequence)_StrideExprStr)
OpOverload)Weights)ExternKernelNode_P_Tc                "    t         j                  S N)dynamo_utilsidentityattrs    \/opt/services/ai/voice_agent/venv/lib/python3.12/site-packages/torch/_inductor/compile_fx.pytime_and_logr      s    $$$    c                      y r}    )argskwargss     r   log_optimus_to_scubar      s    r   )r   r   )FQNGraphInputNameGraphSignaturec                      e Zd ZdZdZdZy)FxCompileModer   rZ   rS   N)__name__
__module____qualname__NORMAL	SERIALIZE
SUBPROCESSr   r   r   r   r      s    F IJr   r   c                  ,    e Zd ZU ded<   ded<   ded<   y)FxCompileConfigr   modebool	use_asyncuse_progressiveNr   r   r   __annotations__r   r   r   r   r      s    
Or   r   c                    d} t         j                  j                  |       }|t        t        j
                  dd      S d}d}|j                         j                  d      rd}|dd  }|j                         j                  d      rd}|dd  }	 |j                         }t        t        |   ||      S # t        $ r dd l
} |j                  t              }|j                  d	|| d
j                  t        d t        j                   j#                         D                           t         j                  j%                  |        t        t        j
                  dd      cY S w xY w)NTORCHINDUCTOR_FX_COMPILE_MODEFzprogressive+T   zasync+   r   z>Invalid value of %s for %s. Expected one of %s. Using default.z, c              3  2   K   | ]  }t        |        y wr}   )repr.0xs     r   	<genexpr>z+_fx_compile_mode_default.<locals>.<genexpr>   s     OT!WOs   )osenvirongetr   r   r   lower
startswithupperKeyErrorr   	getLoggerr   errorjoinsorted__members__keyspop)namevaluer   r   r   logs         r   _fx_compile_mode_defaultr      s2   *DJJNN4 E}}33UEBBIO{{}/bc
{{})	ab	C}U3YPP Cg)		LIIfOm.G.G.L.L.NOOP		
 	

t}33UEBBCs   #B4 4B,E#"E#c                     ddigS )Nmax_autotuneTr   r   r   r   _get_progression_configsr      s     
 r   
perf_hintspre_grad_graphspost_grad_graphscudagraph_static_inputsinductor_metricsc                    t         j                  j                  j                         }t	        t        |             }|r|j                  s|S |j                  j                  S r}   )torch_guardsTracingContexttry_getlistrangefw_metadatastatic_input_indices)	num_fixedcontextfixeds      r   get_static_input_idxsr      sM    
 mm**224Gy!"E'--333r   c                $   | j                   j                  d      d   }g }t        |j                  d   t        j
                  j                        s|j                  d   }n|j                  }|D ]  }t        |t        j
                  j                        rW|j                  j                  d      x}:t        |t        j                        r |j                  |j                                ~|j                  d         ||j                  d<   y )Noutputopr   valoriginal_output_strides)graph
find_nodes
isinstancer   r   r   Nodemetar   Tensorappendstride)gmrp   output_stridesoutput_node_argsr   r   s         r   record_original_output_stridesr      s    ((%%%215KNk&&q)588==9&++A.&++" (vuxx}}-..;3-!!#**,/!!$'( 3AK./r   c                    | j                   j                  dt        j                  j                  j
                        D ]0  }t        | |j                  d   j                        }t        |       2 t        |        y )Ncall_functionr   targetr   )r   r   r   opshigher_orderinvoke_subgraphgetattrr   r   )_recursive_record_original_output_stridesr   )r   nodesubgraphs      r   r   r     sh    ##599#9#9#I#I $  < 2tyy|2231(;	< #2&r   c           	        | j                   j                  dt        j                  j                  j
                        D ]  }t        | |j                  d   j                        }|j                   j                  d      D ]r  }t        t        |j                  d               D cg c]8  }t        |j                  d   |   t        j                  j                        r|: c}|j                  d<   t t        |        y c c}w )Nr   r   r   r   r   user_visible_output_idxs)r   r   r   r   r   r   r   r   r   r   lenr   r   r   r   *_recursive_record_user_visible_output_idxs)r   r   r   idxs       r   r   r     s    ##599#9#9#I#I $  = 2tyy|223NN---: 	D !TYYq\!235diil3/? 5DII01	 	38<=5s   (=Dc                 4    t        j                  t              S r}   )dynamo_loggingget_step_loggerr   r   r   r   _step_loggerr   ,  s    ))#..r   c                    t         j                  j                         rgt         j                  j                  j                  j
                  s8t         j                  j                         dk\  rt        j                  d       y y y y )N)   r   zTensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.)	r   cudais_availablebackendsmatmul
allow_tf32get_device_capabilitywarningswarnr   r   r   _warn_tf32_disabledr   1  sc     	

!##**55JJ,,.&8d	
 9 6 	"r   c           
        t        | j                  d      D cg c]  \  }}|	 c}}      j                  t        | j                  d      D cg c]  \  }}|	 c}}             dfd}|j                  j
                  D ]-  }|j                  dk(  s|j                  }|j                  d      s|j                  d      sCt        | |      sP t        |      |      } t        |      |       }t        |t              r)t        |t              rI|j                  |j                  u r1t        j                  ||      r|j                   |j                   k(  r|j                  d      rdnd}	 ||j                  |	      }
|	 |
 }||_        t#        |||       j%                  |       0 yc c}}w c c}}w )	a  
    In aot_export_module (make_fx), we create get_attr nodes with name prefix
    "_tensor_constant" and "_torchbind_obj". See Tracer.create_arg() in
    torch/fx/_symbolic_trace.py

    However, this might result in name collision if the original mod already
    has a different buffer with the same name.

    We resolve this potential name collision here by changing the target name
    with a new number post fix.
    Fremove_duplicatec                .   d}| j                   D ]  }|j                  dk(  s|j                  j                  |      s/t	        |j                        t	        |      kD  sQ|j                  j                  |      d   }|j                         st        |t        |            } D ]f  }|j                  |      st	        |      t	        |      kD  s-|j                  |      d   }|j                         sRt        |t        |            }h |dz   S )Nr   get_attrrZ   )	nodesr   r   r   r   splitisdigitmaxint)r   prefixir   post_fixkeyexisting_keyss         r   find_smallest_iz0_resolve_name_collision.<locals>.find_smallest_iR  s    KK 	2Dww*$)?)?)Gt{{#c&k1#{{008<H'')3x=1	2 ! 	2C~~f%s8c&k)"yy04H'')3x=1	2 1ur   r  _tensor_constant_torchbind_objN)r   zfx.Graphr
  strreturnr	  )rR   named_parametersupdatenamed_buffersr   r  r   r   r   hasattrr   r   rJ   real_objr   equaldtypesetattradd)modr   r   r   r  r   target_name	gm_targetmodel_targetr
  new_idnew_target_namer  s               @r   _resolve_name_collisionr#  >  s    "33U3KL)$LM #*;*;U*;*STYT3DTU   !/77j ++K))"!,,-=>3,/
;/3I2:k237L)%56|-=>!**l.C.CCI|4OO|'9'99 ))*<= #% 
 %RXXv6F!'1O)DKB3o.C!/- 	M Us   F;
Gc                   ddl m}m} t        | |       i }| j	                  d      D ]   \  }}|||<    |||||j
                         " | j                  d      D ]   \  }}|||<    |||||j                         " |j                  j                  d      }	g }
|	D ]  }|j                  }||j                  v r!|j                  |   }|
j                  |       >||j                  v rE|j                  |   }|
j                  |       t        ||         |j                  t!        |      <   ||j"                  v sJ |
j                  d         ddlm} t)        |j                  j+                         j,                  d         }g }|j.                  }|j0                  }|j2                  }t5        |      D ]f  \  }}d }|t7        |      t7        |      z   t7        |      z   k  r(t9        |j                        }||v r||   }n	||v r||   }|j                  |       h  |||
|t;        j<                         d       }|S )	Nr   )_assign_attr	_AttrKindFr   )	attr_kindplaceholderr   )_unlift)torch.export.unflattenr%  r&  r#  r  	PARAMETERr  BUFFERr   r   r   inputs_to_parametersr   inputs_to_buffersrk   r   rm   user_inputstorch.export._unliftr)  tuplerp   r   buffers_to_mutateuser_inputs_to_mutateoutput_tokens	enumerater   r-   pytreeLeafSpec)r  r   graph_signaturer%  r&  
state_dictr   parambufferplaceholder_nodeslifted_inputsr   	node_nameparameter_namebuffer_namer)  outputsmutated_outputsbuffer_mutationsuser_input_mutationsr4  r   outr   unlifted_gms                            r   _unlift_graphrG    ss    ?C$OQJ++U+C 
e 
4))		

 ))5)A 
f!
4&&		

 ++}+=)+M " 'II	<<<,AA)LN  0/;;;);;IFK  -&z+'>? GG4[AB  ; ;;;;  &' -).rxx/C/C/E/J/J1/M)NGO&88*@@#11Mg& 
&S6:%&-A)BBSEWWW"388,D''(.--,T2u%
& 
K r   Fc              #    K   t        d | j                  j                  d      D              }t               }| j                         D ]@  \  }}||v st	        |t
        j                  j                        s0|j                  |       B |rl| j                  j                  dt
        j                  j                  j                        D ]*  }|j                  |j                  d   j                         , |E d {    y 7 w)Nc              3  4   K   | ]  }|j                     y wr}   )r   r   s     r   r   z&_get_subgraph_names.<locals>.<genexpr>  s      55s   r  r   r   r   r   )rR   r   r   named_childrenr   r   r   rM   r  r   r   r   discardr   r   )r   skip_invoke_subgraphall_subgraph_namesfx_subgraph_names
child_namechild_moduler   s          r   _get_subgraph_namesrQ    s      +5 5((---<5 + *4$&$5$5$7 . 
L ++
%((..1
 !!*-. HH''uyy'='='M'M ( 
 	;D %%diil&9&9:	;
 !  s   AD$D9BD?D Dc                   t        ddd      5  t        j                  }t        j                  }t	        |       D ]'  }t        | |      }t        |d      }t        | ||       ) t        | |||      cd d d        S # 1 sw Y   y xY w)N_recursive_pre_grad_passesTpre_grad_pass_time_uslog_pt2_compile_eventdynamo_compile_column_usr   )	r&   r   add_pre_grad_passesremove_pre_grad_passesrQ  r   rS  r  rd   )r   example_inputs
add_passesremove_passessubgraph_namer   new_subgraphs          r   rS  rS    s     
$"!8
 N
 //
5504 	5Mr=1H5hCLB|4		5
 r>:}MN N Ns   A#A<<Bc                    t        ddd      5  t        | |      D ]  }t        | |      }t        ||        t	        |        d d d        y # 1 sw Y   y xY w)N_recursive_joint_graph_passesTjoint_graph_pass_time_usrU  )r&   rQ  r   r`  ra   )r   rL  r]  r   s       r   r`  r`    sf     
'"!;
  15IJ 	JMr=1H)(4HI	J 	2     5AAc                    t        ddd      5  t        |       D ]  }t        | |      }t        ||        t	        | |       d d d        y # 1 sw Y   y xY w)N_recursive_post_grad_passesTpost_grad_pass_time_usrU  )r&   rQ  r   rd  rb   )r   is_inferencer]  r   s       r   rd  rd    sb    	%"!9
 +
 14 	@Mr=1H',?	@ 	\*+ + +rb  Tc                f   ddl m}m}m}m}m}  || |||      }	| |	       nd}
t        t        |	j                  j                        d   j                  d         D ci c]  \  }}|j                  | }}}g }g }i }| j                  j                  D ]V  }|j                  |v r|j                  |       #|j                  |   |k(  s6|j                  dk7  sF|j                  |       X |D ]B  }d|j                  z   } || |||
||j                        nd|       ||j                     ||<   D |ddd   D ]X  }|j                  r/|j                  D ]  }|j                  |   |k(  rJ d| d        >| j                  j!                  |       Z | j#                          |	|fS c c}}w )	a  
    This function takes an GraphModule input "gm".
    The gm will be split into 2 components,
      1) const_gm, which consists the subgraph of gm that can be constant folded.
      2) gm (being inplace modified,) which returns the graph after constant folding.

    If an additional "lifted_constants" argument is passed in, we will assume the gm has
    been lifted and run the transformation accordingly.

    When a "skip_folding_node_fn" callback is passed, we will skip constant folding on
    the nodes for which the callback returns True.

    const_output_index is a mapping of corresponding node name from gm to the
    output index of const_gm.
    Returns (const_gm, const_output_index)
    r   )CONST_MODULE_TAGMETA_TAG
MODULE_TAGreplace_node_with_constantrun_and_get_constant_graphNr  r(  _FOLDED_CONST_znode: z user not empty.) torch._inductor.constant_foldingrh  ri  rj  rk  rl  r5  r1  r   r  r   r   r   r   r   users
erase_node	recompile)r   skip_constructorlifted_constant_namesskip_folding_node_fnrh  ri  rj  rk  rl  const_gmconst_resultr   r   const_outputsto_erase_nodeto_replace_nodeconst_output_indexr   new_const_namens                       r   split_const_gmr}    s   ,  *
35IH "7!>8:DL #,E(..2F2F,G,K,P,PQR,S"TQM  MO '99%""4(YYx $44M9Q  &	'   F)DII5" )0 ]49956		
 .;499-E>*F dd# &::ZZ Wvvh':5VvEU7VV5W HH%& LLN'''Es    F-c                Z   t         j                  j                  }t        |j                  j
                  |j                  j
                  |j                  j
                  |j                  j
                  g      }|D ]  }| j                  j                  d|      D ]  }t        |j                  j                  dd       t         j                        s8|j                  d   j                  t         j                   k(  sc|j                  d   j"                  j$                  dk(  s  y  y)Nr   r   r   r   TF)r   r   atenrR   mmdefaultaddmmbmmbaddbmmr   r   r   r   r   r   r  float32devicetype)r   r  tf32_opsr   r   s        r   is_tf32_warning_applicabler  f  s    99>>DGGOOJJHHLL  		
H  HH''?6'J 	D499==5u||DIIe$**emm;IIe$++00F:	 r   c                r   t        d | D              }t        j                  r=t        j                  r-|s+t        j                  d       t        j                  d      S t        j                  j                  r+t        j                  d       t        j                  d      S t        j                         S )z
    For CPU backend, enable comprehensive padding causes some unit tests
    fail due to changing number of generated kernels. Skip for now.
    c              3     K   | ]>  }t        |t        j                        st        |j                  j
                         @ y wr}   )r   r   r   rF   r  r  )r   ts     r   r   z6maybe_disable_comprehensive_padding.<locals>.<genexpr>  s/      "#Au||9Tqxx}}s
   A$Az!Skip comprehensive padding on CPUF)comprehensive_paddingz;Skip comprehensive padding for use_runtime_constant_folding)anyr   disable_padding_cpur  perf_hint_loginfopatchaot_inductoruse_runtime_constant_folding
contextlibnullcontext)rZ  has_gpus     r   #maybe_disable_comprehensive_paddingr  {  s      '5 G !!f&B&B7>?||%88				9	9I	
 ||%88%%''r   c                ^    | s|rt        j                  d      S t        j                         S )zH
    graph partition does not support cpp_wrapper and aot_mode yet.
    F)graph_partition)r   r  r  r  )cpp_wrapperaot_modes     r   maybe_disable_graph_partitionr    s'     h||E22%%''r   c                   t               5  t        |      }|s;t        j                  j	                  d      } t        | |      j                  |  n\|st        j                         n t        j                  j                  |dd      }|5   t        | |      j                  |  ddd       ddd       |S # 1 sw Y   xY w# 1 sw Y   S xY w)z}
    If we can not detect fake mode from the context of inputs, create one.

    The created fake mode will be returned.
    Tallow_non_fake_inputs)r   r  N)r   r%   r   _subclassesFakeTensorModerP   	propagater  r  r   r  objectpropagate_dont_convert_inputs)r   rZ  force_allow_non_fake_inputs	fake_modectxs        r   fake_tensor_propr    s     
"	# $^4	))88t8TI8N2I.88.I 3 &&(ZZ&&y2I4P 
  Pr	2PP#     s$   BCB:(C:C	?CCc                    t        j                  |       5  t        j                         cd d d        S # 1 sw Y   y xY wr}   )r   r  get_config_copy)config_patchess    r   get_patched_config_dictr    s1     
n	% (%%'( ( (s   4=c               #     K   t         j                  r#t        t               d      5  d  d d d        y d  y # 1 sw Y   y xY ww)NF)dirdelete)r   force_disable_cachesrC   r@   r   r   r   with_fresh_cache_if_configr    s>     "" Y[7 		 	 		 	s   &A;AA Ac                  |    e Zd ZU ded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   ded<   ded<   ded<   y)_CompileFxKwargszOptional[BoxedBool]
cudagraphsSequence[int]static_input_idxsr   is_backwardzOptional[int]graph_idr  r  rf  zOptional[bool]
layout_optz1Optional[Callable[[list[ExternKernelNode]], Any]]extern_node_serializerzOptional[BoxedDeviceIndex]boxed_forward_device_index
fx_wrapperNr   r   r   r   r  r    sC    ##$$NMM ::r   r  )totalc                  $    e Zd Z	 	 	 	 	 	 	 	 ddZy)_CompileFxCallablec                     y r}   r   )selfr   rZ  r   s       r   __call__z_CompileFxCallable.__call__  s    
 r   Nr   rM   rZ  Sequence[InputType]r   Unpack[_CompileFxKwargs]r  r?   )r   r   r   r  r   r   r   r  r    s-     , +	
 
r   r  c                   |j                  dd        |j                  dd       |j                  dd       |j                  dd        |j                  dd       |j                  dd       |j                  d	d       |j                  d
d        |j                  dd        |j                  dd        t        j                         5 }|j                  t        j
                  j                  j                                |j                  t        t        j                               |j                  t        j                  dddddd             |j                  t                      |j                  t                      t        j                   d|d           t#        t$        d      | |fi |cd d d        S # 1 sw Y   y xY w)Nr  r  r   r  Fr  r  r  rf  r  r  r  compile_fx_innerinductor_compileTcompile_inductor#inductor_cumulative_compile_time_us)
phase_namerV  log_waitcounterwaitcounter_name_overriderW  )r  inductor)compiler_name)
setdefaultr  	ExitStackenter_contextr   r   _python_dispatch_disable_current_modesrW   dynamo_configuse_lazy_graph_moduler~   r&   r  r^   r#   pt2_compiler!   _compile_fx_inner)r   rZ  r   stacks       r   r  r    s   
 lD)
)2.
mU+
j$'
mU+
lE*
ne,
2D9
lD)
.5 
			 
5EKK88OOQR2=3V3VWX%%"-&* $*<)N		
 	689LN+&&}-	
 P"#4JO
 
'
 
 
s   	C.GG
zcompilation time (in seconds)r   c                  '( t         j                  }t        j                  j                  j
                  j                          t        j                  | j                        dk(  r|s~ddl
m} ddlm} |j                  |        t        j                  j                   j#                         }t%        j&                  dd|i|j(                         t+        | j,                        S |j/                  dd      }t0        j3                  d	|       t5        ||      }t7        t9        t;        t=        | j                  j>                                    j@                  d   tB        tD        f      sJ d
| j                          |jG                  d      &tI        tJ        jL                  jN                        |d<   tJ        jP                  rtS        | |fi | tU        jT                         }	tW               }
tY                t[        d d t]        |       D        D              }t_        ddd      5  tJ        j`                   xrD tJ        jb                  xs |
xr. | xr) |xr% t        jd                  jJ                  jf                   }tJ        jb                  }|
}ti        d|       tj        j3                  d||||tJ        j`                         tm        |      D ]L  \  }}t7        |t        jn                        s!tq        |jr                  jt                        sA||v sFd|_;        N d}d}d'd}ty        |       }tU        jz                         }|rt}        j~                  | ||||      \  }'|v|\  }}tj        j3                  d|       |r)t}        j                         }tj        j3                  d       t}        j                  ||||||jG                  dd      |      \  }'ntj        j3                  d       t        jd                  jJ                  jf                  rn|J 'J t        j                          	 t        | ||fi |}|J t        j                         \  }}|j                  |       	 t        j                          n''d   dk(  r>|J tj        j3                  d''jG                  dd      nd       t        | ||fi |}nB'd   d k(  r|J |J tj        j3                  d!       t        j                          	 t        | ||fi |}|J tU        jz                         |z
  |_O        |\  }}||_P        ||_Q        t        j                         \  }}|j                  |       	 t        j                          |t        |      'd"<   |j                  'd#<   tj        j3                  d$|       t}        j                  |||||       n;'d   d%k(  sJ |J |J |\  }}tj        j3                  d&|       ||_P        ||_Q        |J |}''d   nd'(t%        j                  d(( 'xs i |)       t%        j                  d*(|'r'jG                  d+      nd'r'jG                  d,      nd'r'jG                  d      nd-||.       't        d/(fd0'fd12       |j                  |||       ddd       tj        j3                  d3tU        jT                         |	z
         tj        j                  t        j                        rg }t        d4   j                         D ]  \  }}|j                  d5      }t        |      d6k  r|j                  |d7d8d8d8|g       =t        |      d9k\  rd5j                  |dd:       nd5j                  |dd;       } | j                  d<      }!|!rDt        |      d9k\  r6|d:d \  }"}#}$}%d5j                  |dd:       } |j                  | |"|#|$|%|g       |d;d \  }#}$}%d5j                  |dd;       } |j                  | d7|#|$|%|g        tj        j                  d=       tj        j                  d>j                  d?d@dAdBdCdD             tj        j                  dE       |D ]9  }&tj        j                   d>j                  |&        tj        j                  dE       ; t        j                  j                  j
                  j                           t               t        j                  dF|d   rdGndH dI|dJ           S # t        t        f$ r  t        $ r3}t        |t                     j                  |j                        dd}~ww xY w# t        j                          w xY w# t        t        f$ r  t        $ r3}t        |t                     j                  |j                        dd}~ww xY w# t        j                          w xY w# 1 sw Y   4xY w)Kz
    Inductor API that compiles a single graph.

    If you change the argument list for this function, make sure you
    also update the call to save_args_for_compile_fx_inner below accordingly.
    r   )CompileEventLogLevel)_LazyGraphModulezbackward no-op
compile_id)metadata	log_levelr  r   z&static input idxs compile_fx_inner: %szGinductor can only compile FX graphs which return a tuple/list, but got r  Nc              3  8   K   | ]  }||j                     y wr}   )supports_caching)r   backends     r   r   z$_compile_fx_inner.<locals>.<genexpr>N  s&      	#  	  	#s   c              3     K   | ]7  }t        |j                  t        j                  t        j                         9 y wr}   )r\   r  r   r  r  r   r  s     r   r   z$_compile_fx_inner.<locals>.<genexpr>P  s6      
  +V//1B1B
s   =?fx_codegen_and_compileT)rV  r  fx_cachezXFX cache status: use_cache=%s, local=%s, remote=%s, aot_mode=%s, force_disable_caches=%szFX cache key generated: %szUsing remote FX cacher  F)r  	constantszFailed to generate FX cache keycache_statebypasszFX cache bypass reason: %scache_bypass_reasonunknownz*FX cache disabled or key generation failedmissz,FX cache miss, compiling and saving to cachetriton_bundler_metatime_taken_nsz.Saving compiled graph to FX cache with key: %shitzFX cache hit with key: %sdisabledfx_graph_cache_)r  time_nsr  r  
componentszcache not enabled)r  cache_event_timer  r  r  remote_cache_enabledlocal_cache_enabledartifactc                     d  ddS )Nr  jsonr   encodingr   )r  s   r   <lambda>z#_compile_fx_inner.<locals>.<lambda>#  s    -k]; &% r   c                 .    t        j                         S r}   )r  dumps)
cache_infos   r   r  z#_compile_fx_inner.<locals>.<lambda>'  s    4::j#9 r   metadata_fn
payload_fnz%FX codegen and compilation took %.3fsaten_mm_info_   -?   )r  r  z$Overview info of inductor aten mms: z3{:<30} | {:<20} | {:<20} | {:<20} | {:<20} | {:<20}NameBMNKCountz----------------------------------------------------------------------------------------------------------------------------------ztorchinductor done compiling 	BACKWARDSFORWARDS graph r  )ers   aot_compilationr   	_inductorasync_compileCompiledTritonKernelscache_clearr~   count_callsr   torch._dynamo.utilsr  torch.fx._lazy_graph_moduler  force_recompiler   CompileContextcurrent_compile_idr#   log_instant_eventPT2_COMPILEr.   forwardr  static_inputs_logdebugget_input_idxs_to_checkr   nextiterreversedr  r   r1  r   r   rA   r   tritonr  	save_argsr9   timerH   r]   allrD   r&   r  fx_graph_cache
_functorchbundled_autograd_cacher*   r   r5  r   rF   r  r  _is_inductor_staticr<   r  r1   prepare_keyget_remote_cacheload_with_keyri   begin_compiler  collectset_triton_bundlerU   rV   	Exceptionr`   r	   with_traceback__traceback__end_compile_time_taken_ns_fx_graph_cache_key_fx_graph_cache_debug_linesr  _save_graphinstantr  rK   post_compileisEnabledForr   INFOr$   itemsr  r   r   r   endswithr  formatr   ))r   rZ  graph_kwargsr  r  r  r  r  inputs_to_checkstartfx_graph_remote_cachebackends_support_caching	use_cachelocalremoter  inputmb_compiled_graphkey_inforemote_cacher  
start_timer  debug_linestriton_bundler  e	cache_keycompiled_graphmm_table_datar   partsr   
is_batchedbatchmr|  krowr   r  s)                                          @@r   r  r    s	    &&H 
OO!!77CCE)Q.x 	=@((,]]11DDF
,,"J/*66	
 rzz**'3'>'>?RTV'WDFWX-n>OPOd4 89:??BUDMR 
QRTRZRZQ[\R %-%.v}}/G/G%H\"&	
 	
 IIKE<> " 	#
 *"-	
	# 	  
 d
 PM +++ C&&?*?CC )C $$++BBB 	 %%&
I.		f''	
 ".1 	1HAu5%,,/5<<,,-**,0)	1 37
226	 \\^
%1%=%=NL/6&"Xz
 ##+ [		6<#/#@#@#BLII560<0J0J"  , 0 0 F'1-!: 		;<""99$,,,%%%
 ''),$:%;G%! )444 "))+!'!33MB ))+
 :m#<#H$,,,II, "- NN#8)DE !7NO!7C!
 &&0$,,,'''IIDE''),$:%;G%! )44437<<>J3N!0)1&	;8A!5@K!= "))+!'!33MB ))+".478K4L
01*;*J*JJ'IIF	R$$! m,555$000''''/$YII19=4=1<G9 ,,,* *4)?J}%Z 	 	""k]+%2	
 	&&#')3
u%7Az~~l3t  45(!' %	
  ! : 	##NI|LaPMd II5tyy{U7JK %">288: 	BJCIIcNE5zA~$$c3S#u%EF ,/u:?388E#2J'sPR@TD'9:Jc%jAo!&rsq!Qxxcr
+$$dE1aE%BC  *1axxcr
+$$dCAq%%@A)	B, 	78AHHS#sG	

 	  	 CHHQJQQSVWXHHY	  
OO!!77CCELN'&}5;:
F Gj)*	, ] %i0  #A|~6EEOO
 ))+N %i0  #A|~6EEOO
 ))+SPM PMs   -C	g27g2g2Dg28:d$3Bg2A)f5Eg2$e/<.e**e//e22fg2g#.gggg//g22g<c                  $    e Zd ZU dZded<   ddZy)_FxCompileStatr   r	  codegen_and_compilec                     d| j                    S )Nzcodegen_and_compile: )ra  )r  s    r   __repr__z_FxCompileStat.__repr__b  s    &t'?'?&@AAr   N)r  r  )r   r   r   ra  r   rc  r   r   r   r`  r`  ^  s      Br   r`  c                  d    e Zd ZU dZ ee      Zded<   e	 	 	 	 	 	 	 	 	 	 dd       Z	e
dd       Zy)		FxCompileza
    An FxCompile represents a mechanism that can turn a GraphModule into an
    OutputCode.
    z%dict[type[FxCompile], _FxCompileStat]_compile_statsc                     y r}   r   )r  r   rZ  rG  rF  s        r   ra  zFxCompile.codegen_and_compiler  s     r   c                8    | j                   j                          y r}   )rf  clear)clss    r   _reset_statszFxCompile._reset_stats{  s      "r   N
r   rM   rZ  r  rG  r  rF  r  r  r?   r  None)r   r   r   __doc__r   r`  rf  r   r   ra  classmethodrk  r   r   r   re  re  f  sr     =H<WN9W
  , '	
 ' 
  # #r   re  c                  2    e Zd Ze	 	 	 	 	 	 	 	 	 	 dd       Zy)_InProcessFxCompilec                  34567 d|v r|d   J |d   }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }	|j                  d	d      }
t        j                  }|j                  d
d      }|j                  dd      }t        d      j	                         5  t        j                         5  t        j                  x},ddl	}t        j                  d|        |j                  |       t              r
t                t        d   j!                         }t#        j$                  t'        t#        j(                         d              t+               t,        j.                  d|rdnd d|        t1        j2                         }t4        j6                  j8                  j:                  j=                  ||dd       |j?                         7tA        dd 7fd       t        jB                  jE                  |       tG        |      }tI               tK        dd      5  t5        jL                         5  tO        |      }ddd       ddd       tQ               tA        dd fd       t        jR                        5  tU              }|5  tW        |       ddd       t        jB                  jY                  |       tZ        jC                  d t]        d!ddd"             j_                  dddd#      4tA        dd$ 4fd%       t        j`                  jb                  dk7  r~t4        jd                  jf                  ji                  jj                        }tm        t4        jn                  jB                  jp                  |      t4        jn                  jB                  _9        tu               }|jw                         rbt"        jx                  d&k  r!t{        t        d'   j}                               }nt        d'   j                         }t        j                  d|(       t        j                         r 	 t        d)t        t                     i*       ddd       t        jR                  |      5  t        |      5  t        |	|      5  d}d}d}d}|rt        j                  j                  rt        d, -      \  }}t        |g |||	||||d|
.      }t        j                  |      5  t        j                  g       5  |	sJ d/       |j                          |j                         \  }}ddd       ddd       t        ||||	||||||r|j                  nd|r|j                  nd|||
0      }t        j                         }|j                          t        j                  |      5  t        j                  g       5   |j                  |  g }|j                  t               6|j                  D ]  } t        | t              rq| j                         rat        t        | j                                     dk(  r<|j                  t        6fd1| j                         j                  D                     |j                  d        t        |       d}!tK        d2d      5  |j                  r_|j                  rS|j                  rJ |j                         d   j                  }"t        jB                  d3|"j_                  d4             n|j                  rOd5d6lhmi}# |j                  sJ d/       |j                         \  }$}%t        jB                  d7|$j                         |%j                  r t        jB                  d8|%j                         d}&t        j                  r5|j                  t        j                        }&t        jB                  d9|&       tK        d:d      5  |#j                  ||$j                  |%j                  |&|j                  g t        j                  |j                  j                  |r|j                  j                  ng z         ;      }"ddd       n)|j                         }'|'j                  }"t        |'d<d      }!ddd       d5d3t        j`                  jb                  dk7  rt        j                  t4        jn                  jB                  j                               5t        j                  t4        jn                  jB                  j                        3tA        dd= 5fd>       tA        dd? 3fd@       d}(t        j                  t,        j.                        ry|j                         \  })}*}(t        xj                  |)z  c_|        t        xj                  |(z  c_}        t        xj                  |*z  c_~        t        j                  dA|)|*|(dB       t        j                   r>|j                         \  }+}+}(t4        jn                  jB                  j                  |(       t4        jn                  jB                  j                  |j                  j                         |r;t        j
                  j                  rt        jj                  j                  st5        jn                  j                  j                  | rd},jj                  j                  D ]  }-|-j                  j                  dCd      }.|-j                  dDk(  sFt        |.t4        j                        r+t4        jn                  j                  j                  |.      sw|-j                  j                  dEd      x},s n dF}/|,r	|/ dG|, dH}/n|/ dH}/|/t        jj                  _        |rut        jj                  j                  sZt              }0|0rLdI|0j                   }/|0j                  j                  dEd      x},r|/ dG|, dH}/|/t        jj                  _        t        j                  rt        "t        t        t4        jd                  j                   f      sJ t#        |"             t%        |"      cddd       cddd       cddd       cddd       cddd       cddd       cddd       S |rUt        jj                  j                  s:ddJlm}1  |1t        jj                  j*                        t        jj                  _        | j,                  t#        |          xj.                  d5z  c_        t4        jn                  jB                  j0                  rt4        jn                  jB                  j2                  ot        t4        j4                  j6                  j9                               }2|j                  d      }|(|2t4        jn                  jB                  j2                  |<   t;        "||t        jj                  j                  |j=                         t        d   |z
  |||||74|!53      cddd       cddd       cddd       cddd       cddd       cddd       cddd       S # 1 sw Y   lxY w# 1 sw Y   qxY w# 1 sw Y   (xY w# t        $ r t        j                  d+       Y 
w xY w# 1 sw Y   
xY w# 1 sw Y   	xY w# 1 sw Y   	xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)KzS
        Generates the OutputCode from the GraphModule and example_inputs.
        r  Nr  r   r  Fr  r  r  rf  r  z/pytorch.wait_counter.actual_codegen_and_compiler   z3Sleeping for %s since sleep_sec_TESTING_ONLY is setr  i  ztorchinductor compiling r  r  r  )save_dirr  c                     dddS )Nfx_graph_runnablestringr  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    / (% r   c                      S r}   r   )runnable_graph_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    #5 r   r  additional_fake_tensor_propTrV  c                     dddS )Nbefore_post_grad_graphrw  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    4 (% r   c                 ,     j                  ddd      S NFTprint_outputinclude_strideinclude_deviceprint_readabler   s   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    2#4#4!&tD $5 $ r   rf  %szAFTER POST GRADr  r  colored)r  r  r  fast_sympy_printc                     dddS )Nafter_post_grad_graphrw  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s     7$,) r   c                      S r}   r   )inductor_post_grad_graph_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    'C r   )r  
   graph_break)	overwritenum_graph_breakspt2_configs)extra_loggingzfailed to log pt2_configsc                    | j                   dk(  xrc t        | j                  t              xrG | j                  j	                  d      xs* t        | j
                  j                  dd       t              S )Nr  r  r   )r   r   r   r  r   r   r   rJ   )r   s    r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>O  s_    $''Z:O ;&t{{C8; !KK223CD X)$))--t*DFVW	 r   )rt  )
rZ  	shape_envr  r  r  r  rf  r  is_const_graphr  z"AOT mode only supports C++ wrapper)rZ  r  r  r  r  r  rf  r  rz  const_wrapper_codeconst_kernel_codeconst_modulerG  r  c              3  @   K   | ]  }j                  |        y wr}   )doprint)r   sps     r   r   z:_InProcessFxCompile.codegen_and_compile.<locals>.<genexpr>  s     )X1!))A,)Xs   zGraphLowering.compile_to_fnzOutput graph module: 
%s)r  rZ   )AotCodeCompilerzOutput wrapper code: 
%szOutput kernel code:
%sz#Serialized Extern Kernel Nodes: 
%szAotCodeCompiler.compile)device_typeadditional_filesrunnerc                     dddS )N*inductor_provenance_tracking_node_mappingsr  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    (T,21 r   c                      S r}   r   )r  s   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    /Y r   c                     dddS )N0inductor_provenance_tracking_kernel_stack_tracesr  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    (Z,21 r   c                      S r}   r   )inductor_kernel_stack_trace_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    /N r   zGraph Metrics:
%s)num_bytes_accessednodes_num_elemnode_runtimesr   r(  stack_tracezWgraph with symbolic shapes inputs and config.triton.cudagraph_skip_dynamic_graphs=True.z Found from 
z,disabling cudagraphs due to incompatible op ) check_lowering_disable_cudagraph)r   rs   r  rQ   guardr~   preserve_rng_stater   sleep_sec_TESTING_ONLYr+  r   warningsleepr  r   r$   copysyssetrecursionlimitr  getrecursionlimitr   r   rB  ioStringIOr   _dynamorepro	after_aotsave_graph_reprogetvaluerK   r$  fx_graphrr   rc   r&   no_gradr  r   set_fake_modeget_cuda_device_contextrd  fx_graph_transformedpost_grad_graphs_logr)   r  traceprovenance_tracking_levelr   	tracebackget_graph_provenance_jsonr   r8   r  _pre_grad_graph_id _inductor_post_to_pre_grad_nodesr(   in_progressversion_infosumvaluesr  r#   compilation_metric	is_fbcoder   r  r  r7  r  r  r  r  r}  re   set_graph_handlerset_extern_kernel_nodesruncodegen_with_cpp_wrapperr   r[   CachedMetricsHelperfreeze_runtime_assertsgraph_outputsrO   r   rg   has_tensor_outputr   rN   
get_strider   r1  
get_layoutr   _check_triton_bf16_supportr  r  r  codegenr   r2   	codecacher  extern_kernel_nodesr  compiler  dictfromkeyswrapper_coder  compile_to_modulecallr   r  r  dump_inductor_provenance_info_inductor_kernel_stack_traceinductor_metrics_logrA  count_bytesr  r  r  r  log_tlparselog_runtime_and_tensor_metalog_collective_schedule	schedulerr  r)  cudagraph_skip_dynamic_graphsdisable_cudagraphs_reasonr   any_is_symbolicr   r   r   rn   r   r   rM   r  r:   torch._inductor.cudagraph_utilsr  device_node_mappingrf  ra  RECORD_GRAPH_EXECUTIONGRAPH_COMPILE_IDSr   r  r  r;   
get_deltas)8r  r   rZ  rG  rF  r  r  r  r  r  r  r  rf  r  	sleep_secr+  inductor_countersfdr  r  cuda_contextprovenance_tracking_jsonmetrics_contextr  rz  const_graphr  r  ru  r   metrics_helperr   rE  compiled_fn_runnercompiled_fnr  r  kernel_codeserialized_extern_kernel_nodescompiled_moduler  	num_bytesr  r  r  r   meta_valdisablemaybe_incompat_noder  r  r  r  r  r  ry  s8    `                                                 @@@@@r   ra  z'_InProcessFxCompile.codegen_and_compile  sY     |+\0J0VVV ,\ :
+7+;+;<OQS+T(,,]EB"."2"2:t"D(,,]EB'++L%@
**)--neD5t< 	
 JKQQSQ	++-Q	 $:::	GI9 

9%)"-#% ( 4 9 9 ; !!#c&;&;&=t"DELN*"-;:> ?!
$ BMM))::B
T ;  "$ 6 GGR0
 .n=I$ B-T E ]]_ E 0^ DIEE 6b9
 	 + CA6r:! O/NO,,R@$***)'+'+ $	 02/@/@!&#'#'%)	 0A 0, !!  D <<99Q>**DDRXXN - ;!OO11DD4 OO))J #6"7"..0'''1+.x/F/M/M/O+P(+3M+B+H+H+J(&99"&9I ##%	A, -s3J3L/M+wCAL 	*o3NCo .k8Do
 &*""%)"$(! 3 3 P P 4B.40H0 #0 ')"+!)$/!)/E%1$/'+#-#K ++K811"5  +P,PP{#)'@@B >*,=  & $2'% +%+A!- +'94F*00D 4E)//$!,$3)-0 ")!<!<!> ,,.((/ d1J1J21N dEII~.QSN**6 +,#(#6#6 <C *3 7$'$9$9$;$'(=cnn>N(O$PTU$U !/ 5 5$))X@P@W@W)X$X!" !/ 5 5d ;< /u5 *.&%5T > !>>e.>.>','8'88#8*/--/!*<*?*?K+11 ; + : : : N
 #^^B#(#4#4  D#4 9>8V8V8X5L++11 ;\=O=O  +00 / 5 5$={?P?P!" >B: 44$)$@$@AVAV$W !? !0 5 5$J$B!"
 ". 9QU" " />.E.E$)$0$6$6$/$5$5$B050A0A	6&)-,1,>,>,O,O 4? 1<0H0H0Y0Y57	-.**	6& /F /"	" ", /4.E.E.GO*9*>*>K18 /42.y>B BF>6:3||==BEIZZ!OO11OOQFB ;?**!OO11NN;7 )&) (Z )&) (O %)M+88FCHCTCTCV@	>=22i?2-->-...@.,1106?2@1> )).3.?.?.A+1m--II-X OO))AA%//BWBWX #"MMGG ! A A!OO11AA>R&*$&HHNN 
&D'+yy}}UD'AH $= 8'1(ELL'I','<'<'L'LX'V (.2iimmM4.PP{P %
& #|&)0	k]"&MG)0	nG<C9!!''*K*K.STV.W+.(TUhUoUoTp&qG.A.F.F.J.J -t/  {   .5I\+b*Q@GAGG=(()'#tUXX5I5I)J  -,-    ,K8kd d dWo o o oEQ	 Q	 Q	J "!''*K*K
 = ! ; ; 9 ''T
3GG1LG --DD!OO11CCO%(!MM88KKM&
 $0#3#3J#?#/ * "OO11CCHM +#&99&113 ,/@@"&)$'*4*B7#ed d dWo o o oEQ	 Q	 Q	VE EE E8O Oz % A $?@AACA CAR   N" "I> >9d d d d dWo o o o o o o o oEQ	 Q	 Q	 Q	 Q	 Q	s  > E6	{"{	+{"3:-|!;{/		F|!({<$~60~!	=A!~|;4-|.!|;)A7~ }76C%}"E} A:}2}K?}"C>}"
	}7	~	~!	%	~6.	7	 
F}"	}7	~'	~!	0	~69		 {{""{,'/{94|!<|	|!|	|!!|+&.|83|;;} ~}}}}""}+'}7.	~7~ <~	~!	~~!		~6!~*&~6-	6~?;	 	  )Nrl  )r   r   r   r   ra  r   r   r   rr  rr    sK    mm ,m '	m
 'm 
m mr   rr  c                   t         t        j                  k(  rt               }nIt         t        j                  k(  rddlm}  |       }n$t         t        j                  k(  rddlm	}  |       }t        r'ddlm} ddlm} t        |      sJ d        ||      }t        r=ddlm}	 ddlm} t        |      sJ d       t#               }
t               } |	|||
      }j%                  | |||      S )	NrZ   )_DebugSerdeFxCompile)_SubprocessFxCompile)_AsyncFxCompile)_OutOfProcessFxCompilez7async is only valid with an out-of-process compile mode)_ProgressiveFxCompilez=progressive is only valid with an out-of-process compile mode)fx_compile_moder   r   rr  r   compile_fx_extr	  r   compile_fx_subprocr
  fx_compile_asynccompile_fx_asyncr  r  r   fx_compile_progressiver  r   ra  )r   rZ  rG  rF  schemer	  r
  r  r  r  progression_configsfast_schemes               r   r  r  r  s     -...$&	M33	38%'	M44	4<%'5:&"89 	
E	
9 !(;:&"89 	
K	
9 78 *+&{F<OP%%b./<XXr   c                d   g }t        |       D ]  \  }}t        |t        j                        s!t	        |j
                  j                        sAt               5  ||v rt        |      r
	 ddd       et        |      s
	 ddd       z	 ddd       |j                  |        |S # 1 sw Y   xY w)z
    This function runs at compile time, and generates a list of indices for which we
    might need to do a copy to preserve alignment requirements.
    N)r5  r   r   r   rF   r  r  ro   rI   rG   r   )inputsr  ids_to_checkr  rN  s        r   r%  r%    s     Lf% 5%.ell''(02 	 %%*;E*B		 	
 /u5	 	
 6	 	A), 	 	s   B&3B&&B/	r   )r  placeholdersmutated_input_idxsc                    ddl m}	 t        j                  j                  rEt        j                  |	|||||||t        j                  j                  j                         	      nt        d d fd}
|
S )Nr   )cudagraphify_impl)device_indexstack_tracesr  rf  r  r  r  r  c                ~    't        j                         5   |       d d d         |       S # 1 sw Y   xY wr}   )r~   r  )
new_inputsr   cudagraphify_fnmodelr  s    r   r  zcudagraphify.<locals>.run  sH    002 T-eZARST:&&T Ts   3<)r!  r  r  r   )torch._inductor.cudagraph_treesr  r   r)  cudagraph_trees	functoolspartialr   r   r  r  )r#  r  r  r  r  rf  r  r  r  new_cudagraphify_implr  r   r"  s   ``         @@r   cudagraphifyr)    sr    
 }}$$#++!%%#%%1}}33FFH

 ,K' ' Jr   c                    t        j                  | j                         | j                         | j                  | j
                        S )z1
    Copy and input while preserving strides
    )r  r  )r   empty_stridedsizer   r  r  )r   s    r   static_inputr-    s/     qvvx177188TTr   c                V    t        | |      } t        ||      }| j                  |       y)z=Index into expanded dimensions of both dst and src then copy_N)r>   copy_)dstsrcexpanded_dimss      r   index_expanded_dims_and_copy_r3    s'     c=
1C
c=
1CIIcNr   c                *  	
 t        |      }t        t        |            t        ||       t	        |t
              sJ t        |      D cg c]  \  }}|vrt        |      ng  c}}t        |      D cg c]@  \  }}t	        |t        j                        s|n|vrt        |      n|j                         B c}}t        t        |            D ]8  \  }\  }}t	        |t        j                        s$|vs)t        |   ||       : t        j                  j                          t        j                  j!                         }|j#                  t        j                  j%                                t        j                  j'                  |      5   | t                     ddd       |j                          t        j                  j%                         j#                  |       t        j                  j                          t        j                  j)                         
t        j                  j+                  
|d      5   | t                    ddd       t	        t
        t,        f      sft.        j0                  rd
fd}n1t3        t5                    D cg c]	  }|vs| c}	d	
fd}t7        ||t                     S c c}}w c c}}w # 1 sw Y   ExY w# 1 sw Y   xY wc c}w )zQ
    Assumes inputs[static_input_idxs[i]] are always the same memory address
    Nthread_local)streamcapture_error_modec                   t              t        |       k(  sJ t        t        |             D ]u  \  }\  }}}t        |t        j
                        s%t        |t        j
                        sJ |v r$|j                         |j                         k(  rgJ t        |||       w | j                          j                          	S r}   )
r   r5  zipr   r   r   data_ptrr3  ri  replay)
r!  r   r0  r1  r2  r   inps_expanded_dimsr  static_inputsstatic_outputss
        r   r  zcudagraphify_impl.<locals>.run=  s    }%Z8882;M:/AB3 K..c3 "#u||4!#u||444++<<>S\\^;;;
 2#sMJK LLN!!r   c                    D ]8  }|   }| |   }t        |t        j                        sJ t        |   ||       : | j	                          j                          S r}   )r   r   r   r3  ri  r;  )	r!  r   r2  r1  copy_indicesr   r<  r=  r>  s	       r   r  zcudagraphify_impl.<locals>.runU  si    # V 23 7 o!#u||444-mC.@#}U	V
 LLN!!r   )r!  list[InputType]r   Callable[[list[InputType]], Any])r%  rR   rq   rl   r   r   r5  r=   r   r   r-  detachr9  r3  r   synchronizeStreamwait_streamcurrent_streamr6  	CUDAGraphr   r1  r   size_assertsr   r   rj   )r#  r  r  check_input_idxsr   r   r2  r6  r  r@  r   r<  r=  r>  s     `      @@@@@r   r  r    s    /v7HI)3#F,=>* 6#34fd###  'C !$+< <!"D  '	 C a.  ++ a		M $-S9K-L#M Paa&36G+G)-*<aOP
 
JJZZ F
uzz0023			6	" #d=!"#
	JJ++F3	JJ JJ  "E			%>		R 4tM234ntUm4(*	" 	"* !]!34
CT8TC
	" 	" (-=z|LLY	*# #4 46
s1   K+AK1"K7L;	LL7LLc                <   t        | t              sJ |        t        |        t        j                  |xs i       }|j                  dd      st        j                  sd|d<   |j                  dt        j                  j                        }|r|j                  d      r"J d       i |dt        | j                        i}dd	lm}  ||      }|j                  d
d       }| j                   j                  dd       }t"        j$                  j'                  |      }t)        j*                  d      5  t"        j$                  j-                  |      5  t/        ddd      5  t1               5  t3        | |t5        j6                  ||      |      }	t        |	t8              sJ |	j:                  cd d d        cd d d        cd d d        cd d d        S # 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)Nr  FTr  zaot_inductor.output_pathz.pt2a
  The output path for aot_compile should not have an extension with .pt2 this is for specifying the output path for the .so in AOTInductor. If you would like to package the AOTInductor generated files into a pt2, please call `torch._inductor.aoti_compile_and_package`.rZ   )maybe_aoti_standalone_configr  dynamo_compile_idcompile_fx_aot)rV  reset_event_log_on_exit)r  )inner_compiler  )r   rM   r+   r  deepcopyr   r   r  r  output_pathrD  r0   coder   rL  r   r   r   r   r  rs   set_aot_compilationcompile_contextr"   r(   
compile_fxr&  r'  r:   filename)
model_example_inputs_rP  r  rR  rL  r  saved_compile_idsaved_compile_contextcompiled_artifactss
             r   rN  rN  b  s    fk*2F2* &f-%)]]>3GR%HN|U3v7H7H(,}% $$"F$7$7$C$CK ''/ 	
R	
/

&	&++(>

 41.AN+//0H$O{{':DA!MM889IJ	d#+%%&;<+ 	"&$(	
+ 	+ (#++'= *
 ,l;;;!**-+ + + + + + + + + + + + + + +sa   2 HG=!G(,AG	.	G(7	G= 	HGG(	G=(G1-G=4	H=H	HHc                v   ddl m}m}	 t        |        t	        j
                  | d      }
|
rt        | |d        ||         |	|| |      \  }D cg c]  }||   	 }}t        |      }|j                  j                  ^ }}|j                  d   }t        |      D cg c],  \  }}t        |t        j                  j                        s+|. c}}|j                   d<   g }t        j"                  j$                  j'                         }dgd|,|j(                  J |j(                  }t+        dt-        |      dz
        t/        t0                  }|j2                  }|J d}t-        |      dkD  rg t5        t-        |            D ]I  }|vrd ||<   |dkD  r(||   ||dz
     k(  r|dz  }n|j7                  ||          j9                  |       K |j:                  J t5        t-        |j:                              D ]  }||vsd |j:                  |<    |j<                  r|j<                  j>                  }t@        jB                  jE                  |dd      5   ||||||d||
      d d d        tF        jH                  rS d
fd	}d|_%        |S c c}w c c}}w # 1 sw Y   8xY w)Nr   )%convert_conv_weights_to_channels_lastfreezeTr  r   rZ   r  )r  r  r  rf  r  r  c           
         D cg c]  }| |t        |         z
      }}| j                           |      S c c}w r}   )minri  )r   r  args_newmax_offset_idxoptimized_functionpreserved_arg_indicesunwrapped_args_offsetss      r   wrapperz%fw_compiler_freezing.<locals>.wrapper   sT     +
 +C>,BCCD
 
 	

!(++
s   <)r   zlist[object]r  zSequence[torch.Tensor])&torch._inductor.freezingr^  r_  r`  re   decide_layout_optr  r%   r   r  r   r5  r   r   r   r   r   r   r   r   params_flat_unwrap_subclassesr  r   rR   r	  params_unwrapped_to_flat_indexr   r  r   params_flatr   r   r   r  r  rs   r  _boxed_call)aot_autograd_modelaot_example_inputsdynamo_modelnum_example_inputsrP  r  r  forward_devicer^  r_  r  	opt_modelindr  r  model_outputs_nodemodel_outputsr   r|  r  tracing_contextparams_flat_unwrappreserved_indices_params_flatunwrapped_idxscurrent_offsetr  rg  rc  rd  re  rf  s                              @@@@r   fw_compiler_freezingr|    s    W ""45001CRVWJ+-?F-.@A'-($I$ >SSc,S1SS !34I '__22Q&++A.M#M2;QjEHHMM6R;67 $&mm22::<OSN"<<HHH,JJQ$6 7! ;<(23(9%(GG)))!"Q&%'"s-./ 	:A--(,"1%q5^A..Q2GG"a'N-11.2CD")).9	: **666s?6678 	6A5515++A.	6 && / ; ; P P			9&=t	D 

*/!'5!	


 	!!, , GNQ T;L

 

s   J$&,J)J)'J//J8c                     t         j                  j                  rt        t	        d             t         j                  j
                  t         j                  j
                  n	t               ddddS )Nzcpp wrapper enabledFT)ztriton.autotune_at_compile_timeztriton.autotune_cublasLtztriton.cudagraphsztriton.store_cubin)r   r)  r  r5   r4   autotune_at_compile_timerY   r   r   r   get_cpp_wrapper_configr    sY    }}+'(=>	
 }}55A MM22$)""
 
r   c                B   t         j                  j                         st        j                         S t        d t        |       D              }t        |      dk(  r1t         j                  j                  t        t        |                  S t        j                         S )zX
    Returns a cuda device context manager if there is a single device in the graph
    c              3  @   K   | ]  }|j                   d k(  s|  yw)r   N)r  r  s     r   r   z*get_cuda_device_context.<locals>.<genexpr>'  s       8FKK64I8s   rZ   )r   r   r   r  r  rR   rD   r   r  r&  r'  )r   cuda_devicess     r   r  r     s     ::""$%%''-7 8,R08 .L |! 	

$tL123 ##%r   c                0   t        |       }|5  t        | d       d d d        |j                  dd       }t        j                  1t        j                  dd      5  t        | |fd|d|cd d d        S t        t        j                  t              sJ t        j                  t        j                  j                  j                  d      5  t        j                  | |fd|d|cd d d        S # 1 sw Y   xY w# 1 sw Y   y xY w# 1 sw Y   y xY w)NT)rL  static_lifetime_input_indicesr   r{  r  )compilerr  )r  r`  r   r   custom_partitioner_fnr~   r&   r   r   r7   	__class__r   )r   joint_inputsr   r  r  s        r   partition_fnr  2  s5   
 +2.L	 E 	&btD	E :@':! ##+&&1
 		 7 $.K	
 		 		 &668KLLL&&((22;;"&
 
	 // $.K	
 	
	 
	/E E		 		
	 
	s#   C4D D4C= D	Dc                f    t        |       }t        j                  |j                   }t	        |      S r}   )rp   r6  arg_tree_leavesr   r   )r#  ru  rv  s      r   get_num_model_outputsr  \  s/    $U+**,>,C,CDM}r   )frozenc                  ,    e Zd ZU ded<   ded<   ded<   y)CompilerConfigExtrarA   r  r	  r  r3   rr  Nr   r   r   r   r  r  b  s    M$$r   r  c                    t        | j                  j                        }t        t              }t        d       }t        |||      S )N)r  r  rr  )rA   r)  r  r&  _graph_counterr3   r  )r   r  r  rr  s       r   create_compiler_config_extrar  i  sF    
 6==334J N#H &d+N% r   c           	     `    |r/t        dd  fd       t                t        dd  fd       t        j                  j                  j                  |t        |            }t               }t        j                  rt        j                  |j                   }	t        |	      }
t        j                  j                  j                         }|%|j                   r|s|j                   j"                  }nd}||
k  sJ ||z   }||
k  sJ t%        ||      D cg c]+  }t'        |	|   t        j(                  j*                        r|- c}|j,                  d<   ng |j,                  d<   t/                 | |t1        |      |j2                  |j4                  ||j6                  	      S c c}w )
a#  
    Compile the forward graph of the given graph module.

    Args:
        gm: The graph module to compile.
        example_inputs: The example inputs to use for compilation.
        num_orig_model_outputs: The number of model outputs from the original dynamo graph.
        num_example_inputs: The number of example inputs from the original dynamo graph.
        compiler_config_extra: Extra configuration for the compiler.
        inner_compile: The inner compile function to use.
        is_inference: Whether this is an inference graph.
    r  c                     dddS )Nbefore_joint_graphrw  r  r   r   r   r   r  z$compile_fx_forward.<locals>.<lambda>  s    ,$! r   c                 ,     j                  ddd      S r  r  r  s   r   r  z$compile_fx_forward.<locals>.<lambda>      r00"4  1   r   r  c                     dddS )Nafter_joint_graphrw  r  r   r   r   r   r  z$compile_fx_forward.<locals>.<lambda>  s    +$! r   c                 ,     j                  ddd      S r  r  r  s   r   r  z$compile_fx_forward.<locals>.<lambda>  r  r   r   r   )r  r  r  rf  r  )rK   r`  r   r  r   num_fw_fixed_argumentsr   rp   r   keep_output_strider6  r  r   r   r   r   r   num_mutated_inp_runtime_indicesr   r   r   r   r   r   r   r  r  rr  )r   rZ  num_orig_model_outputsrq  compiler_config_extrarP  rf  r   ru  rv  num_model_outputsr   original_output_start_indexorig_output_end_idxr   s   `              r   compile_fx_forwardr    s   , 		
 	&b)		
 OO!!88C/E %R  ..0B0G0GH.--..6687#6#6|##CC ( +,'%):::: :<RR #&7777 8:MN?
-,ehhmm< ?
 :; ?A :;
 /r2
/6(33&//!#8#G#G ?
s   0F+c                   ddl m} |5  t        |       }t        j                  rlt        j                  |j                   }t        |      D cg c]+  \  }}t        |t        j                  j                        r|- c}}|j                  d<   ng |j                  d<   t        |       }	t        j                  rt        j                   t#                     nt%        j&                         5   || |t)        t+        |	            |j,                  d|j.                  |j0                        cddd       cddd       S c c}}w # 1 sw Y   nxY w	 ddd       y# 1 sw Y   yxY w)a5  
    Compile the backward graph of the given graph module.

    Args:
        gm: The graph module to compile.
        example_inputs: The example inputs to use for compilation.
        compiler_config_extra: Extra configuration for the compiler.
        inner_compile: The inner compile function to use.
    r   )compile_lockr   T)r  r  r  r  r  N)torch._dynamo.convert_framer  rp   r   bw_outputs_user_visibler6  r  r   r5  r   r   r   r   r   rB   r  r  r  r  r  r   r   r  r  rr  )
r   rZ  r  rP  r  ru  rv  r   r|  r   s
             r   compile_fx_backwardr    s?    9	 (_))"224F4K4KLM (6CCa/ C##$>? CE##$>?r" !! LL/12'')	
 !"&uU|"40;; .77+@+O+O	 	 C	 	 	  s7   AE0D>?A-E,?E+	E>EE		EE#c           
         t        dd  fd       t        j                  dt        d ddd             t	         j
                        t        j                  j                  _        t        j                  j                  d	k(  rc j
                  j                  D ]J  }|j                  s|j                  t        j                  j                  j                  |j                  <   L t!         |       t        dd
  fd        S )Nr  c                     dddS )Nbefore_pre_grad_graphrw  r  r   r   r   r   r  z%run_pre_grad_passes.<locals>.<lambda>#	  s    + 
 r   c                 ^     j                  ddd      dt         j                         z   S NFTr  z

 # graph id: r  idr   rX  s   r   r  z%run_pre_grad_passes.<locals>.<lambda>'	  9    600tD 1 
 b./
01 r   r  r  zBEFORE PRE GRADTr  rZ   c                     dddS )Nafter_pre_grad_graphrw  r  r   r   r   r   r  z%run_pre_grad_passes.<locals>.<lambda>B	  s    * 
 r   c                 ^     j                  ddd      dt         j                         z   S r  r  r  s   r   r  z%run_pre_grad_passes.<locals>.<lambda>F	  r  r   )rK   pre_grad_graphs_logr$  r)   r  r   r   r  r  r   r  r  r  r  #_inductor_pre_grad_node_stack_tracer   rS  )rX  rY  r   s   `  r   run_pre_grad_passesr  	  s    
 
1
 	
	 02&,,/?EOO,||--2LL&& 	D$$ %%II$))T	 (@F
1
 Mr   c                   #$ t        d |D              r2t        j                  j                  j                  j                          |rHt        j                  |      5  t         | t        j                  |            ||      cddd       S t        j                  st        j                  rt        j                  }t        j                  }t        j                  dddt                     5  t        j                  |      5  |}t         t              r	 j                   j"                  D 	cg c],  }	|	j$                  dk(  r|	j&                  j)                  d      . }
}	|
D cg c]   }t        |t        j*                        r|nd" }
}t        d |
D              rt-        t/               |
|      D ]g  \  }}}|
t        |t        j*                        sJ |j0                  |j0                  k7  s@t3        d	| d
|j0                   d|j0                   d       |
}ddlm} t9        |      } | |i |      5 \  }}}}}t        ||t;        j<                  ||      ||      cddd       cddd       cddd       S t;        j<                  t        ||      }t?               stA         ||      S t         t              r1t         j                   jB                  tD              rtG         ||      S tI        tJ        jL                        5  tO               5  t        jP                  jR                  jU                  t        jV                  jX                  dk(        5  t        j                  jZ                  j]                         5  t         t              rt_         |       t        d |D              r1ta         ||      cddd       cddd       cddd       cddd       S t        jb                  rJ te        |      $tg        t              #||n	ti               }	 	 	 	 	 	 	 	 d"# $fd}t;        j<                  |d      }tk        tl        |      }t        jn                  rSt        jp                         s?t;        j<                  tr         $#jt                  #jv                  #jx                        }n't;        j<                  |d      }tk        tl        |      }t{        d      	 	 	 	 	 	 d##fd       }tk        tl        |      }t9        |      xs  t        j|                  j                  d      }t        j                  j                  j                         xs t        j                  j                  |      }t        j                  ryddlDmE}  |        t        j                  d      5  t         |d|      \  }}ddlHmI}  ||      }|j                   j"                  D ]  }	|	j$                  dk(  sd|	j&                  vs" t        |	j                        |      }t        |t        j*                        r%|J |j                  |d      |	j&                  d<   |t        |t        j                        r8t        j                  j                  j                  ||      |	j&                  d<   t        |t              s||	j&                  d<    	 ddd       t               }d j&                  v r j&                  d   |j&                  d<   d  j&                  v r j&                  d    |j&                  d <   t        j                  j                         } | rt        j                  j                  nt        j                  }!t        j                  |      5  t        j                         5   |!       5   |||      cddd       cddd       cddd       cddd       cddd       cddd       cddd       S t        j                  |      5  t        j                  j                  |      5  t        j                         5  t        j                  d      5  	  t        ||||t        d#jt                  #jx                  |!	       |      cddd       cddd       cddd       cddd       cddd       cddd       cddd       cddd       S # 1 sw Y   rxY wc c}	w c c}w # 1 sw Y   nxY w	 ddd       n# 1 sw Y   nxY wddd       # 1 sw Y   xY w# 1 sw Y   ^xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       f# 1 sw Y   pxY w# t        $ r}"|"j                         dd}"~"ww xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)$a@  
    Main entry point for compiling given FX graph.  Despite the fact that this
    lives in :mod:`torch._inductor`, this function is responsible for calling
    into AOT Autograd (and we will eventually get a callback to
    ``inner_compile`` to perform actual compilation.  In other words, this
    function orchestrates end-to-end compilation for the inductor backend when
    you use :func:`torch.compile`.

    NB: This function TAKES OWNERSHIP of the input ``model_`` and can potentially
    mutate it!  Make a copy if you need to preserve the original GraphModule.
    c              3     K   | ]8  }t        |t        j                        xr |j                  j                  d v  : yw))r   xpuN)r   r   r   r  r  )r   rU  s     r   r   zcompile_fx.<locals>.<genexpr>b	  s8       	1ell#H(HHs   >A )rP  decompositionsignore_shape_envNF)r  r  r(  r   c              3  $   K   | ]  }|d u 
 y wr}   r   )r   vs     r   r   zcompile_fx.<locals>.<genexpr>	  s     :q}:s   zBDevice mismatch between fake input and example input at position #z: z vs zx. If the model was exported via torch.export(), make sure torch.export() and torch.aot_compile() run on the same device.r   )_fakify_script_objectsrZ   c              3  R   K   | ]  }t        |t        t        t        f       ! y wr}   )r   r   r1  r  r   s     r   r   zcompile_fx.<locals>.<genexpr>	  s     Kaz!dE401Ks   %'c           
         t        j                  d      5  t        t              rt	              }nt	        |       }t        | |||      cd d d        S # 1 sw Y   y xY w)Nz$compile_fx.<locals>.fw_compiler_base)r  rq  r  rP  rf  )r~   r&   r   rM   r  r  )r   rZ  rf  r  r  rP  rX  rq  s       r   fw_compiler_basez$compile_fx.<locals>.fw_compiler_base	  sf    
 **+QR fk2-B6-J*-B2-F*)"+A'9*?"/!-  s   9AA#r  )rp  rq  rP  r  r  rr  Tbackward)r  c                x    t        j                  d      5  t        | |      cd d d        S # 1 sw Y   y xY w)Nzcompile_fx.<locals>.bw_compiler)r  rP  )r~   r&   r  )r   rZ  r  rP  s     r   bw_compilerzcompile_fx.<locals>.bw_compiler
  s?    
 ))*KL +"*?"/	  s   09r  )is_valid_aoti_model_name)unlift_effect_tokens)trace_jointr  )_detect_fake_mode_from_gmr  )static_shapes dynamo_flat_name_to_original_fqnrM  )	fw_compilerr  inference_compilerr  r  keep_inference_input_mutationsr  r  r  )r   rM   rZ  r  rf  r   r  r?   )r   rM   rZ  r  r  r?   )`r  r   r  r  AsyncCompilewakeupr   r  rV  r  r  r  rs   set_real_inputsr   rM   r   r  r   r   r   r   r9  r
   r  
ValueErrortorch._export.non_strict_utilsr  r%   r&  r'  graph_returns_tuplemake_graph_return_tuple_codegenrX   handle_dynamo_export_graphrW   r  r  r   r   r  preserve_node_metar  r  r$  reset_provenance_globalsr  r'   _raise_error_for_testingr   r  r_   r/   r?   freezingis_grad_enabledr|  r  r  rr  rL   r  r  r   r   r   r  r   r  functorch_configr,   torch._export.utilsr  r   r   from_tensorScriptObject_libraryfake_class_registrymaybe_to_fake_objrJ   rG  _C_is_any_autocast_enabled_DisableAutocastr  r  r  r   _disabletracingrT   r  rU   remove_dynamo_frames)%rX  rY  rP  r  r  r  cpp_wrapper_configfx_wrapper_configinputs_r   fake_inputsinpr   fir  r  r  patched_mod	fake_argsr  recursive_compile_fxr  r  r  r  rw  r  r   r8  r  r   rF  disable_ampr   rU  r  rq  s%   ` `                                @@r   rV  rV  N	  s	   (     	%%2299;
 \\.) 	:fll>:=I-!1	 	 V..#//"-- LL#("' -.9	 o.9	 ,;G&+. !' 2 2ww-/ IIMM%(   + &c5<<8CdB 
 :k::&)%';&H "
R>#-a#>>#>!yyAHH4&0&hilhmmo')yykahhZ @o%o'" !"	" *GM(1I'YG  L!"+"3"3%$6#4#
 $2%5
 Q9	 9	 9	v %,,#%)	 v&& 
 	
 &+&:~, * 
 	
 	}BBCw9 "w9 	--LL22a7	
w9 	668w9 fk*(AF
 K?KK'$)w9 w9 w9 w9 w94 2222 1 <V D -8N>Q>S 			/	 	 		 	* .UC 	 6j+N??5#8#8#:5>5F5F$##5+0;;.774CC6 "+!2!23CRV!W!@." 
'*	=		-@		 
>	 6j+N$
 J--D-I 	 MM((002 7}}++I6 	
 7$&!''TB  6&7# %#1	'#O J5b9	 HHNN 6Dww*,dii1G!8DKK!8!<%fell;#,#88#8/8/D/D &d 0E 0DIIe, (0B0BC % B B T T$-v!" !IIe,
 (0@A/5DIIe,6# 6D (ODK1V[[@GM{{6H  !CD #fkk18>DW8X  !45  ((;;=K-8))j>T>T  + H->-G-G-I H79 H)+GH H H Hw9 w9 w9 w9 w9F OOI&	9MM!!/2	9 &&(	9 ""=		99
| + +'9#1!-374??/D/S/S%5
 /
+	9 	9 	9 	9 	9Ew9 w9 w9 w9 w9I	 	8(  Q9	 9	 9	 9	 9	 9	~ 6  6jH H H H H H H H* $ 9 ,,.D89%	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9Ew9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9s"   )c;6d>,d(81d)d(/%d2d(5d(=Ad(,d1	d(:	d>i'Ah0,)h;h		h	h0"	i5Gh	A	ee B<ee/Ch	7fe-	e	e-&	f/	h	8	h	h0
	ih	2 g1g'g>f2 2f2	g;	g	g1	h		h	h0(	i;d
d(dd(	d>(d1	-d>>eeh	e!e-$	f-e62f9
h	fh		f/f**f//f22f;7g>	ggg	g1g%!g1(	h	1g:6h	=	hhh	h0h$ h0'	i0h9	5iic                   t        | t              syt        |       j                  \  }t        |t        t
        f      ryt        |t        j                  j                  j                        rst        |j                  d      r]t        |j                  j                  j                        dkD  r1t        d |j                  j                  j                  D              ryy)z"True if a FX graph returns a tupleT_schemarZ   c              3  L   K   | ]  }t        |j                        d k(    yw)r   N)r  r  )r   rets     r   r   z&graph_returns_tuple.<locals>.<genexpr>
  s     OcCHH)Os   "$F)r   rM   rp   r   r   r1  r   r   r   r   r  r   r   r  returnsr,  )r   rvs     r   r  r  
  s    b+&O  ER"tUm$2uxx}}))*BIIy)		!!))*Q.ORYY5F5F5N5NOO r   c                   t        |       }|j                  \  }t        j                  |      \  }| j                  j                  |      5  | j                  j                  |       ddd       | j                  j                  |       t        |       sJ  || |      t        j                        dfd       }|S # 1 sw Y   [xY w)z
    Mutate gm so it returns a tuple.  This is only needed for graphs
    not created by torchdynamo that return non-tuples.
    Nc                 <    t        j                   | i |      S r}   )r6  tree_unflatten)r   r   r   specs     r   rg  z(make_graph_return_tuple.<locals>.wrapper
  s     $$[$%A&%A4HHr   )r   r   r   r   r  r   )rp   r   r6  tree_flattenr   inserting_beforer   rp  r  r&  wraps)r   r  
compile_gmr   r  rg  r   r  s         @@r   r  r  
  s     r?DIIER""2&HB		"	"4	( 
HHr"""R(K__[!I "I N s   CCc                .   | j                   j                  t        j                  j                   j	                         | j                   _        | j                           ||  j                  |       t        j                        dfd       }|S )z
    `torch._dynamo.export` embeds pytrees in the FX graph codegen object,
    convert that to a normal FX graph so inductor can compile it.
    c                 F    j                    j                  |         S r}   )process_outputsprocess_inputs)r   r  r   s    r   rg  z+handle_dynamo_export_graph.<locals>.wrapper
  s'    &&{4JG4J4JD4Q'RSSr   )r   r   r  r   )	r   r  r   r   CodeGenrq  r  r&  r   )r   r  r  rg  r  r   s       @@r   r  r  
  sx     hhG..0BHHLLNR!7!7!7!@AK__[!T "T Nr   c                   dd}t        j                  | j                  j                         | j                        D ]  }t        |t              st        |      }|r,t        |      r!|j                         t        j                  k7  rNt        |      }|j                  d      r y  ||j                                 y )Nc                    ddl m} | J t        | j                        }|j	                  |       }t        j                  |j                   d        |d      )Nr   )rV   z9 does not support bfloat16 compilation natively, skippingzBF16 is not supported)torch._dynamo.excrV   r    r  get_device_propertiesr   r   r   )r  rV   device_interfacedevice_propss       r   warn_and_skipz1_check_triton_bf16_support.<locals>.warn_and_skip
  s\    /!!!3FKK@'==fE  !!Z[	
 /00r   F)including_emulation)r  zOptional[torch.device]r  r   )	itertoolschaingraph_inputsr  r  r   rg   rf   rF   	get_dtyper   bfloat16r    is_bf16_supported
get_device)r   r  r   r  r  s        r   r  r  
  s    
1  2 2 9 9 ;U=P=PQ )$'%d++&~~5>>1 4K@--%-Hdoo'()r   )optionsc               B   ddl m}  ||       sJ d       d}d}t        | j                  j                  t
        j                  j                  j                        r| j                  j                  }t
        j                  j                  j                         | j                  _        | j                          |j                  j                  |j                  j                  }|j                  j                  G|j                  j                  }n0t        | d      r| j                  }t        | d      r| j                  }|t!        j"                  |      nd}|t!        j"                  |      nd}	t!        j$                  ||xs i f      \  }
}t'        d |
D              rd	d
lm}m}  ||j.                  d      |
D cg c]&  }t        |d   t
        j0                        r|d   nd( }}|||k7  rt3        d| d|       |||	dni |||	d}||fS c c}w )z
    Flatten the inputs to the graph module and return the flat inputs and options.
    Add "aot_inductor.serialized_in_spec" and "aot_inductor.serialized_out_spec" to the options.
    rZ   )r  zGraph output must be a tuple(). This is so that we can avoid pytree processing of the outputs. Please change the module to have tuple outputs.N_in_spec	_out_spec c              3  V   K   | ]!  }t        |d    t        j                         # yw)rZ   N)r   r   r  r   s     r   r   z'_aoti_flatten_inputs.<locals>.<genexpr>  s!     
MA:adE../
Ms   ')r   )	UserErrorUserErrorTypezTorchBind objects found in inputs. TorchBind object inputs are not supported in AOTInductor. TorchBind objects can only be attributes.z>Trying to flatten user inputs with exported input tree spec: 
z-
but actually got inputs with tree spec of: 
)zaot_inductor.serialized_in_specz aot_inductor.serialized_out_spec)rV  r  r   r   r  r   r   rX   r  rq  pytree_infoin_specout_specr  r  r  r6  treespec_dumpstree_flatten_with_pathr  r	  r  r  INVALID_INPUTr   r  )r   r   r   r  r  r  r   r  serialized_in_specserialized_out_specflat_args_with_pathreceived_specr  r  r   flat_example_inputss                   r   _aoti_flatten_inputsr)  
  s(    0r" 	" GH"((##UXX^^%B%BC((##!HHNN224
&&2))11G''3**33H 2z"kkG2{#||H;B;N..w7TV+3+?h'R  *0)F)F	v|*& 
M9L
MM>''8
 	
 CV=>
1Q4.!D8  }7Mi <o
 	
 ? 0B0C	



/A0C
  ''1s   +H)r   r  r  z.Callable[[Callable[_P, _T]], Callable[_P, _T]])r   r  r   r  r  rn  )r  r   )r  zlist[dict[str, Any]])r   r	  r  	list[int])r   rM   r  rn  )r  zCallable[..., None]rm  )r  rM   r   rM   r  rn  )r  rM   r   rM   r8  r   r  rM   )F)r   rM   rL  r   r  zGenerator[str, None, None])r   rM   rZ  r  r  rM   )r   rM   rL  r   r  rn  )r   rM   rf  r   r  rn  )TNN)
r   rM   rr  r   rs  zOptional[list[str]]rt  z)Optional[Callable[[torch.fx.Node], bool]]r  z"tuple[GraphModule, dict[str, int]])r   rM   r  r   )rZ  r  r  "AbstractContextManager[None, None])r  r   r  r   r  r+  )r   rM   rZ  r  r  r   r  z torch._subclasses.FakeTensorModer}   )r  z$Optional[Union[str, dict[str, Any]]]r  zdict[str, Any])r  zGenerator[None, None, None]r  )r   rM   rZ  r  rF  r  r  r?   )
r   rM   rZ  r  rG  r  rF  r  r  r?   )r  r  r  r  r  r  )r   )r#  Callable[..., Any]r  r  r  r	  r  zlist[Optional[str]]r  r   rf  r   r  ztuple[torch.Tensor, ...]r  zSequence[PlaceholderInfo]r  ztuple[int, ...]r  r,  )r   torch.Tensorr  r-  )r0  r-  r1  r-  r2  r*  r  rn  )r#  r,  r  zlist[torch.Tensor]r  r  r  rB  )
rX  rM   rY  rA  rP  r  r  Optional[dict[str, Any]]r  z2Union[list[Union[str, Weights]], str, GraphModule])rn  rM   ro  r  rp  rM   rq  r	  rP  r,  r  rA   r  r	  rr  r3   r  z0Callable[[list[object]], Sequence[torch.Tensor]])r  zdict[str, object])r   torch.fx.GraphModuler  zAbstractContextManager[None])r   rM   r  zSequence[object]r   r  r  ztuple[GraphModule, GraphModule])r#  rM   r  r	  )r   ztypes.ModuleTyper  r  )r   rM   rZ  r  r  r	  rq  r	  r  r  rP  Callable[..., OutputCode]rf  r   r  r?   )
r   rM   rZ  r  r  r  rP  r0  r  r?   )rX  rM   rY  r  r  rM   )rX  rM   rY  r  rP  r0  r  r.  r  z.Optional[dict[OpOverload, Callable[..., Any]]]r  r   r  zPUnion[Callable[[list[object]], Sequence[torch.Tensor]], str, list[str], Weights])r   rM   r  r  r  r,  r  r,  )r   re   r  rn  )
r   r/  r   z!Union[list[Any], tuple[Any, ...]]r   r.  r  r.  r  z tuple[list[Any], dict[str, Any]](  
__future__r   r  r  enumr&  r  r  r  r   r   r  r+  r   abcr   r   collectionsr   r   dataclassesr   inspectr	   r
   operatorr   typingr   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   unittestr   torch._inductor.async_compiler   torch.fxtorch.utils._pytreer   _pytreer6  functorch.compiler   r   torch._dispatch.pythonr   torch._dynamor   r   r  r   r~   torch._dynamo.device_interfacer    torch._dynamo.repro.after_aotr!   r  r"   r#   r$   r%   r&   r'   r(   r)   r*   torch._functorchr  7torch._functorch._aot_autograd.subclass_parametrizationr+   torch._functorch.aot_autogradr,   r-   r.   r/   torch._inductor.codecacher0   r1   r2   r  r3   r4   r5   r6   !torch._inductor.custom_graph_passr7   torch._inductor.debugr8   r9   torch._inductor.output_coder:   r;   r<   r=   r>   r?   'torch._inductor.runtime.cache_dir_utilsr@   torch._inductor.utilsrA   rB   rC   rD   rE   rF   rG   rH   rI   "torch._library.fake_class_registryrJ   torch._loggingrK   torch._utils_internalrL   rM   %torch.fx.experimental.symbolic_shapesrN   rO    torch.fx.passes.fake_tensor_proprP   torch.monitorrQ   torch.utils._ordered_setrR   _dynamo.backends.commonrT   _dynamo.excrU   rV   fx._lazy_graph_modulerW   fx.graphrX   utils._tritonrY   r  r[   codegen.commonr\   r]   r$  r^   decompositionr_   excr`   fx_passes.joint_graphra   fx_passes.post_gradrb   rc   fx_passes.pre_gradrd   r   re   irrf   rg   output_coderh   triton_bundlerri   rj   rk   rl   rm   rn   ro   rp   rq   rr   virtualizedrs   collections.abcrt   ru   rv   
torch._opsrw   )torch.export.pt2_archive._package_weightsrx   ry   rz   r{   r  r   r   torch._inductor.fb.utilstypes&torch._functorch._aot_autograd.schemasr   r   r   Enumr   r   r   r   _fx_compile_configr   r  r   r  r   r  r   r   r   _logginggetArtifactLoggerr  r  r  r#  r  r   r   r   r   	lru_cacher   cacher   r#  rG  rQ  rS  r`  rd  r}  r  r  r  r  r  contextmanagerr  r  r  r  r  r`  re  rr  r  r%  r)  r-  r3  r  rN  r  r|  r  r  r  r  r  r  r  r  r  rV  r  r  r  r  r)  r   r   r   <module>rp     s	   "     	    	 
   # # - !     I I U U  $  $ $ A  ;  D =
 
 
 8  O N  B  >
 
 
 @ + ?   W ; & / 2 5 : % &  U  .  5 B /   ' I )
 
 
  3:%A$ t_T](((*% L DII    CD ./ $))%// +;; g!00<Hnn66xARS ~~77BTU NN44'  ~~77BTU 
4A*'=  T/ / 	
 	
E/PJ	J%J8FJJ\ 38!!+/!!0NN'N N( 38+/	(	+ "15FJ	E(E(E( /E( D	E(
 (E(P*('('(.	(	(!%	('	( ).' "& &	@ <@(8((  y  +
+
'+
 '+
 	+
\ 23EE'E -E 	E 4EP
B B# #4o) od-Y-Y'-Y
 #-Y --Y -Y`  $   J (*) +-.0*,))$) 	)
 &) ) ) () ,) () )XU		  
	 (*\M\M\M %\M &	\MD )9/3	?+?+$?+ &?+ -	?+
 8?+D qc#c+c c 	c
 &c c c %c 6cL&$''"' ' %	'T $% % %: 0@kk'k  k 	k
 /k -k k kd 0@	++'+ /+ -	+
 +\//*=//j 0@/3EI"y9y9(y9 -y9 -	y9
 Cy9 y9 Vy9x	$ # 	4 # 	,)D (,R(
 )-R(R(
+R( %R(
 &R( &R(r   