
    ɯwgcN                        d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
mZmZmZ d dlmZmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ erd dl0m1Z1  ee2d      Z3 ee2d      Z4d Z5de6ddfdZ7d Z8d Z9 G d d      Z:da;da<e jz                  d        Z>e jz                  d        Z?ddZ@y)    N)AnyDictListOptionalTYPE_CHECKINGUnion)call_backward	call_hookFakeCompiledAutogradEngineGetItemSourceLocalSource)counterslazy_format_graph_codeset_locals_to_steal)getArtifactLoggertrace_structuredclone_preserve_strides)FakeTensorMode)GraphModule)BackwardState)	decomposedisable_autocast_cachedisable_proxy_modes_tracingfetch_object_proxyProxyTorchDispatchModePythonKeyTracertrack_tensor_tree)
DimDynamicShapeEnv)preserve_node_metaset_stack_trace)CapturedTraceback)Proxycompiled_autogradcompiled_autograd_verbosec                  h    t         j                  j                  j                  j	                  d      S )Nr'   )torch_logging	_internal	log_stateis_artifact_enabled     d/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/torch/_dynamo/compiled_autograd.py snapshot_verbose_logging_enabledr1   )   s(    >>##--AA# r/   msgreturnc                 .    t         j                  |        y N)verbose_logdebug)r2   s    r0   cpp_verbose_log_fnr8   /   s    cr/   c                  ^    t         j                  j                  j                  j                  S r5   )r)   	_inductorconfigtriton
cudagraphsr.   r/   r0   snapshot_cudagraph_enabledr>   3   s    ??!!((333r/   c                      | t        |       S | S r5   r   )xs    r0   maybe_clonerA   7   s    }%a((Hr/   c            	       "   e Zd ZddZd Zedefd       Zdee	j                     dee   deeeef      fd	Zd
efdZd ZdefdZd Zd Zd Zdee   fdZd Zd Zd Zd Zd ZdefdZdededee	j<                  j>                     fdZ y)AutogradCompilerInstancer3   Nc                 F   || _         t        j                         | _        | j                  j                  | _        t               | _        t        dd| j                        | _        t               | _
        t        | j                  d      | _        d | _        g d| _        y )NT)allow_fallback_kernelsallow_non_fake_inputs	shape_envsymbolic)inputssizesscalarshooks)compiler_fn
contextlib	ExitStackstackcloser!   rG   r   fake_tensor_moder   	fx_tracerr   
proxy_modehooks_proxygraph_placeholders)selfrM   s     r0   __init__z!AutogradCompilerInstance.__init__>   s|    &))+
ZZ%%
! .#'"&nn!

 )*0L,0"Ir/   c                 t    t        |t        j                        sJ | j                  j	                  ||      S )N)source)
isinstancer)   TensorrR   from_tensor)rW   r@   rZ   s      r0   	wrap_fakez"AutogradCompilerInstance.wrap_fakeM   s2    !U\\***$$0060BBr/   c                 ,    t        t        |       |      S r5   r   )nameidxs     r0   rZ   zAutogradCompilerInstance.sourceQ   s    [.44r/   rI   rJ   rK   c                 ,    t         d   dxx   dz  cc<   d  _        i  _        t        j                  j                          j                  _        t        j                  j                  t               j                  _        i  j                  _         fd j                  D        \  }}} _        t        |      D cg c]'  \  }} j!                  | j#                  d|            ) }}} j%                  ||       t        |      D 	cg c]@  \  }}	 j&                  j)                  |	 j#                  d|      t*        j,                        B }}}	 j%                  ||       t        |      D ]  \  }}	 j#                  d|      }
t/        |	t0              r/ j&                  j)                  |	|
t*        j,                        ||<   Wt/        |	t2              rL j&                  j5                   j&                  j7                  |	|
t*        j,                  	      |	|

      ||<   t9        dt;        |	              j%                  ||        j<                  j?                  tA        i               j<                  j?                   jB                          j<                  j?                   jD                          j<                  j?                  tG                       j<                  j?                  tI                      |||fS c c}}w c c}	}w )Nr&   captures   )
tracer_clsc              3   Z   K   | ]"  }j                   j                  d |di        $ yw)placeholderr.   N)rS   create_proxy).0r`   rW   s     r0   	<genexpr>z9AutogradCompilerInstance.begin_capture.<locals>.<genexpr>a   s0      D
 NN''tRDD
s   (+rI   rJ   rK   )rZ   dynamic_dim)hintrZ   zUnexpected scalar type: )%r   aot_graph_cls_nameaot_graph_infosr)   nnModulerS   rootfxGraphr   graphtensor_attrsrV   rU   	enumerater^   rZ   bind_tensors_to_proxiesrG   $create_unspecified_symint_and_symbolr    DYNAMICr[   intfloatcreate_symfloatnodecreate_unspecified_symbolAssertionErrortyperP   enter_contextr   rR   rT   r   r"   )rW   rI   rJ   rK   
args_proxysizes_proxyscalars_proxyra   r@   valrZ   s   `          r0   begin_capturez&AutogradCompilerInstance.begin_captureU   s    	$%j1Q6115:<#hhoo/$xx~~~I&(#D
//D
@
K0@ $F+
Q NN1dkk(C89
 
 	$$VZ8 &e,
 S NN??GS)""
 
 	$$UK8!'* 	LHC[[C0F#s##~~RR&& 
 C'#~~AANN<<%$.$6$6 = 
 !  B   %%?cKK'	L( 	$$Wm< 	

  2/

  !6!67

  1

  !7!9:

  !3!56ug%%[

s   ,L
ALbackward_idxc           
         | j                   J | j                   |   }| j                  j                  dt        || j	                  |      g| j	                  |      i       }t               5  g }|D ]E  }||j                  d        |\  }	}
}}|j                  t        j                  |||	|
             G | j                  ||       d d d        t        |      S # 1 sw Y   t              S xY w)Ncall_function)kindtargetargskwargs)sizedtypelayoutdevice)rU   rS   rh   r	   to_proxyr   appendr)   emptyrw   tuple)rW   rI   output_metadatassaved_tensorsr   backward_c_functionproxiesgrad_insoutput_metadatar   r   r   r   s                r0   proxy_call_backwardz,AutogradCompilerInstance.proxy_call_backward   s	    +++"..|<..--  #m, v&
  . 	
 )* 	<57H#3 "*OOD).=+tKKTvfU ((7;	< X	< Xs   .AC  C3c                     | j                   j                  dt        |g|D cg c]  }| j                  |       c}|      S c c}w )Nr   )rS   rh   r
   r   )rW   hookr   r   r@   s        r0   proxy_call_hookz(AutogradCompilerInstance.proxy_call_hook   sM    ~~**,01q$--"1 
 	

 2s   Aic                     | j                   J | j                   |   }| j                  |||   d      }t               5  t        ||         ||<   | j	                  ||   g|g       d d d        |S # 1 sw Y   |S xY w)Ntensor_pre_hook	hook_typerU   r   r   rA   rw   )rW   rI   hook_idr   r   proxys         r0   r   z(AutogradCompilerInstance.tensor_pre_hook   s    +++($$1I' % 

 )* 	?#F1I.F1I((&)ug>	? 	? s   )A22A<c                    | j                   J | j                   |   }| j                  ||d      }t               5  |D cg c]  }t        |       }}| j	                  ||       d d d        |S c c}w # 1 sw Y   |S xY w)Npre_hookr   r   )rW   rI   r   r   r   r@   s         r0   r   z!AutogradCompilerInstance.pre_hook   s    +++(&&  ' 

 )* 	:.45k!n5F5((9	:  6	: s   A6A1A61A66B c                    | j                   J | j                   |   }| j                  |||d      }t               5  |D cg c]  }t        |       }}| j	                  ||       d d d        |S c c}w # 1 sw Y   |S xY w)N	post_hookr   r   )rW   outputsrI   r   r   r   r@   s          r0   r   z"AutogradCompilerInstance.post_hook   s    +++(&&!	 ' 
 )* 	;/67!{1~7G7((':	;  8	; s   A7A2A72A77Bc                    t        |t        j                        sJ | j                  J | j                  |   }| j	                  ||d      }t               5  t        |      g}| j                  ||g       d d d        |S # 1 sw Y   |S xY w)Npost_acc_grad_hookr   )r[   r)   r\   rU   r   r   rA   rw   )rW   inputr   r   r   s        r0   r   z+AutogradCompilerInstance.post_acc_grad_hook   s    %...+++($$* % 

 )* 	9 '(E((8	9 	9 s    BBc                    i }d}t        |j                        }|d   j                  dk(  sJ |d   }t        |j                  j	                               }t        | j                        }||   |d   k(  sJ |t        |      z   dz
  }||   |d   k(  sJ t        |      D ]  \  }	}
|s)|
j                  d   j                  j                  dk(  rd}1|
j                  d   j                  j                  d	k(  }t        |
j                  d   j                               dk(  }|s|st        |
j                  j	                               }t        d
 |D              s|
||	<    |rX|j                         D ],  }
|
j                  d   j                         |
j                  d<   . t        |j	                               S g S )NFr   rI   rd   r   cudaTcpuc              3      K   | ]L  }t        |j                  t        j                  j                        xr |j                  j
                  d v  N yw))primsatenN)r[   r   r)   _ops
OpOverload	namespace)ri   users     r0   rj   zDAutogradCompilerInstance.move_graph_nodes_to_cuda.<locals>.<genexpr>  sN        t{{EJJ,A,AB C--1BBCs   AA)listnodesr   userskeyslenrV   rv   metar   r   r   allvaluesr   )rW   rt   to_movehas_cuda_inputsr   rI   inputs_usersfirst_getitem_idxlast_getitem_idxr   nodeis_cpu	is_scalar
node_userss                 r0   move_graph_nodes_to_cudaz1AutogradCompilerInstance.move_graph_nodes_to_cuda   s   ,.U[[!Qx(***qFLL--/0 7 78&'<?:::,s</@@1D%&,r*:::: . 	&GAt"tyy'7'>'>'C'Cv'M"&YYu%,,11U:FDIIe,11349I)!$**//"34
  !+  "&GAJ	&& ( ;#'99U#3#8#8#:		% ; ''	r/   c           	         | j                   j                  dt        j                  di        | j                  j                          | j                   j                  dd| j                   j                  | j                  |            fi        | j                          | j                          g t               r%| j                  | j                   j                        t        | j                   j                  | j                   j                  d      t!        dg       t#        dddd      }t$        j'                  d	|       t(        j+                  d	|       t-        d
fd       fd}|| j/                        fS )Nr   r.   outputCompiledAutogradrI   zCompiled autograd graphT)include_deviceinclude_stridecoloredz%scompiled_autograd_graphc                  (     j                  d      S )NF)print_output)print_readable)rt   s   r0   <lambda>z6AutogradCompilerInstance.end_capture.<locals>.<lambda>J  s    u333G r/   )
payload_fnc                     	 da D ](  }||   j                         j                  d      ||<   *  | ||||      da S # da w xY w)NT)non_blockingF)in_compiled_autograd_region
pin_memoryr   )compiled_fnrI   rJ   rK   rL   r   runtime_inputs_to_moves         r0   runtime_wrapperz=AutogradCompilerInstance.end_capture.<locals>.runtime_wrapperM  s`    4.2+/ OA &q	 4 4 6 ; ; ; NF1IO #65'5A.3+e+s	   9? A)rS   rh   r   _exec_final_callbacks_stubrP   rQ   create_node
create_argr   rename_aot_dispatcher_nodesreorder_accumulate_grad_nodesr>   r   rt   r   rq   r   r   compiled_autograd_loginfor6   r7   r   rM   )rW   r   lazy_graph_coder   rt   r   s       @@r0   end_capturez$AutogradCompilerInstance.end_capture'  sM   ##&AA		
 	

""^^&&t}}W'=>@		
 	((***,,.%'%)%B%B4>>CWCW%X"NN!5!57I
 	EH:.0%
 	""49$0%G	

		4  0 0 777r/   c           	         | j                   ydt        j                  j                  j                  dt        j                  j                  j                  fd}| j
                  j                         D ]  \  }}|d   }|d   }|d   j                  }t        |j                        }t        |      }|J 	 |j                  dk7  rt        |      }|j                  dk7  r	 t        | j                  j                  j                        }	t        |      D ]  }
t        |	        t        |	      }|j                  d	k7  r- |||      s$t        |	      }|j                  d	k7  r
 |||      s$|j                  d	k7  r|j                  d	k7  r|j                  st        |	      }6 |||      st        d
| d|j                    |_        t#        |j$                        D ])  \  }}d
| d|j                    |j$                  |   _        + t        |      }t        |	      }|j                  d	k7  r|j                  d	k7  r y# t        $ r Y w xY w# t        $ r' t&        j)                  d| j                   |||       Y w xY w)z
        Renames nodes as they appear in the AOTDispatcher backward graphs, prefixed by AOT id
        e.g. AOTDispatcher backward graph X's `sin_Y` -> `aotX_sin_Y`
        Nabc                    | j                   |j                   k(  }|s]t        | j                   d      xrE t        |j                   d      xr- | j                   j                  |j                   j                  k(  }|xra | j                  |j                  k(  xrF | j                  |j                  k(  xr+ t        | j                        t        |j                        k(  S )N__name__)r   hasattrr   opr   r   all_input_nodes)r   r   target_matchs      r0   
is_similarzHAutogradCompilerInstance.rename_aot_dispatcher_nodes.<locals>.is_similarb  s    88qxx/LAHHj1 ?*5?))QXX->->>   EDDADDLEFFaff$E ))*c!2C2C.DD	r/   ca_node_start_idxaot_idaot_gmr   r   aot_zIFailed to match %s%s (NodeCall %s) nodes with AOT backward graph %s nodes)rm   r)   rr   r   Nodern   itemsrt   iterr   nextr   StopIterationrS   ranger   r`   rv   r   r6   r7   )rW   r   nodecall_indexr   r   r   	aot_graphaot_itaot_nodeca_itr   ca_noder   inps                 r0   r   z4AutogradCompilerInstance.rename_aot_dispatcher_nodesZ  se   
 ""*	%((--,, 	1C1C 	 %)$8$8$>$>$@ 5	 ND $%8 9(^FX,,I )//*FF|H'''kk_4#F|H kk_4
&T^^1177801  AK u+ jjH,Z5R #5kG jjH,Z5R kkX-'**2H"=="&u+ %h8 ,+%((--#AGL"+H,D,D"E T3<?xq
:S//27T  $F|H"5kG! kkX-'**2H;5	 ! D ! !!_++"s,   9)I$B
I/CI	II,JJc                 >   | j                   j                  j                  dt        j                  j
                  j                  j                        D ]G  }t        |j                        }||j                  us'|j                  dk7  s7|j                  |       I y)a  
        Usage of AOTAutograd causes all the accumulate_grad_ nodes to get pushed to the end of
        the graph.  This differs from eager mode, which schedules them as soon as possible. This
        pass attempts to reorder the graph to mimic eager behavior.
        r   )r   r   rg   N)rS   rt   
find_nodesr)   opsinductoraccumulate_grad_defaultmaxr   prevr   r   )rW   r   args      r0   r   z6AutogradCompilerInstance.reorder_accumulate_grad_nodes  sz     NN((33uyy'9'9'J'J'R'R 4 
 	!D dii.C$))#-(?

4 	!r/   c                     |y t        |t              r|D cg c]  } j                  |       c}S t        |t              rt         fd|D              S t        |t        j
                        sJ t         j                  |      }t        |t        j                  j                  j                  j                        sJ |j                  S c c}w )Nc              3   @   K   | ]  }j                  |        y wr5   )r   )ri   r@   rW   s     r0   rj   z4AutogradCompilerInstance.to_proxy.<locals>.<genexpr>  s     5aq)5s   )r[   r   r   r   r)   r\   r   rS   rr   experimentalproxy_tensor_ProxyTensorr   )rW   tr@   r  s   `   r0   r   z!AutogradCompilerInstance.to_proxy  s    9a./0DMM!$00a51555!U\\***)$..!<,(=(=(J(J(W(WXXX!!! 1s   Cc                    t        |t        j                  j                        r$t	        t        |            D cg c]  }||   	 }}t        |      t        |      k(  sJ t        ||d | j                         y c c}w N)constanttracer)r[   r)   rr   r%   r   r   r   rS   )rW   tensorsr   r   s       r0   rw   z0AutogradCompilerInstance.bind_tensors_to_proxies  sb    guxx~~.+0W+>?awqz?G?7|s7|+++'7T$..Q @s   A<indexc                     | j                   J | j                   |   }t               }t        ||d | j                         |S r  )rU   r   r   rS   )rW   r  r   bw_states       r0   bind_backward_statez,AutogradCompilerInstance.bind_backward_state  sB    +++  ' ?(EDPr/   	node_namer   pyobjc                    d}|y|j                   }t        |d      ra|| _        |j                  }t	        | j
                  j                  j                        ||j                  j                  d| j                  |<   | | d| d}t        j                         j                         d   }|j                  d|      }t        |       y )N _aot_id)r   r   r   z (NodeCall )r   z:raw_stack_trace = CapturedTraceback.extract().format()[-1])_forward_clsr   rm   r   r   rS   rt   r   _lazy_backward_info	bw_modulern   r$   extractformatreplacer#   )	rW   r  r   r  maybe_aot_idforward_clsnew_coderaw_stack_tracenew_stack_traces	            r0   set_node_originz(AutogradCompilerInstance.set_node_origin  s     ,,K{I.*3'*22),T^^-A-A-G-G)H*)==GG8$$^4  [k.9IK+335<<>rB)11H(
 	(r/   r3   N)!r   
__module____qualname__rX   r^   staticmethodr   rZ   r   r)   r\   rz   r   r{   r   r   r   r   r   r   r   r   r   r   r   r   rw   r  strr   autogradFunctionr-  r.   r/   r0   rC   rC   =   s    JC 5] 5 5?&U\\"?& Cy?& eCJ'(	?&B!
 !F	
# &&c &P18fL\!"R )) ) //0	)r/   rC   Fc              #     K   t         j                  j                  j                  j	                  t        j                  t        |             }t               r7t         j                  j                  j                  j                  t               da	 t         j                  j                  d      5  d  d d d        |sdat         j                  j                  j                  j	                  |       y # 1 sw Y   AxY w# |sdat         j                  j                  j                  j	                  |       w xY ww)NTF)r)   _C_dynamor&   set_autograd_compiler	functoolspartialrC   r1   set_verbose_loggerr8   compiled_autograd_enabledr3  set_multithreading_enabled)rM   priors     r0   enabler?    s     HH..DD2K@E ()**==>PQ $H^^66u= 		 (-%**@@G	 	 (-%**@@Gs6   BD>D 1C66D >8D>6C?;D 9D;;D>c               #   f  K   t         j                  j                  j                  j	                  d       } da	 d  | rdat         j                  j                  j                  j	                  |        y # | rdat         j                  j                  j                  j	                  |        w xY ww)NFT)r)   r6  r7  r&   r8  r<  )r>  s    r0   disablerA    s     HH..DDTJE %H(,%**@@G (,%**@@Gs   6B1A5 8B159B..B1c                      d} t         rJ t        j                  j                  j                  j                  d        t        j                  j                  j                  j                  d        y )NF)r   r)   r6  r7  r&   r8  r;  )compiled_autograd_enables    r0   resetrD    sM    $***	HH&&<<TB	HH&&99$?r/   r.  )ArN   r9  typingr   r   r   r   r   r   r)   torch._dynamo.external_utilsr	   r
   r   torch._dynamo.sourcer   r   torch._dynamo.utilsr   r   r   torch._loggingr   r   torch._prims_commonr   torch._subclassesr   torch.fxr   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr   r   r   r   r   r   r   %torch.fx.experimental.symbolic_shapesr    r!   torch.fx.tracebackr"   r#   torch.utils._tracebackr$   torch.fx.proxyr%   r   r   r6   r1   r2  r8   r>   rA   rC   r<  r   contextmanagerr?  rA  rD  r.   r/   r0   <module>rT     s      B B  
 < U U > 6 ,   ?   G B 4 $ *(4GH *EFC D 4j) j)\ "  $  H H" 	H 	H@r/   