
    ɯwg                        d dl Z d dlmZmZ d dlmZmZ d dlmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlZd dlZd dlmZ d dlmc mZ d dlZd dlmZ d dlmZmZ d dlmZ d d	l m!Z! d d
l"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 ejd                  jg                  e4d      Z5ddl6m7Z7 ddl8m9Z9m:Z: ddl;m<Z< ddl=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZH ddlImJZJmKZKmLZLmMZMmNZN ddlOmPZPmQZQmRZR ddlSmTZTmUZUmVZVmWZWmXZXmYZYmZZZm[Z[m\Z\m]Z]m^Z^m_Z_ ddl`maZambZb ddlcmdZdmeZemfZfmgZgmhZhmiZimjZjmkZkmlZlmmZmmnZnmoZompZpmqZq ddlrmsZsmtZtmuZumvZvmwZw ddlxmyZymzZzm{Z{m|Z|m}Z}m~Z~mZ ddlmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ eZ e j                          Zi Z ed ee         Zd!ee   d"edd#e+d$ee/   d%ef
d&Zd!ee   d"edd%ee+ee/   f   fd'Zd(ed"edd#e+d$ee/   d%ee	eqf   f
d)Zd(ed"edd#e+d$ee/   d%ee	eqf   f
d*Zdedd d+dfd+d,d-d.e	d/e	d0ee	   d1e	d2ee
   d3ed4ed5ee	   d%e	fd6Zd7ej6                  d%ej6                  fd8Zdedd+ddfd7ej6                  d/e	d0ee	   d1e	d2ee
   d5ee	   d9ee(   d%ej6                  fd:Zddd+ddd;d7ej6                  d2ee
   d<ed=ee   d>ed?ee   d%eej<                  j>                  eif   fd@Zd ddAdBe	d<ed3ed2ee
   d%ej<                  j>                  f
dCZd dd+d+dddDdBe	d3ed2ee
   dEed>ed?ee   d%eej<                  j>                  eqejD                  ejD                  f   fdFZed7ej(                  j6                  fdG       ZeZeZy)H    N)contextmanagernullcontext)partialwraps)AnyCallableDictListNewTypeOptionalTuple)patch)Tensor)PhiloxStateTrackerrng_decompositions)enable_python_dispatcher)compiled_autograd)dynamo_timedpreserve_rng_state)detect_fake_mode)	BoxedBool)
FakeTensorFakeTensorMode)make_fx)ShapeEnv)is_traceable_wrapper_subclasscudagraph_static_inputs   )config)AOTAutogradCacheautograd_cache_key)*run_functionalized_fw_and_collect_metadata)"_check_if_mutation_can_be_in_graph&are_all_mutations_hidden_from_autograd1are_all_mutations_under_no_grad_or_inference_modeassert_functional_graphfrom_fungen_alias_from_basehas_data_mutationhas_metadata_mutationis_funsync_functional_tensorto_fun)"_tensors_definitely_do_not_overlapcompute_overlapping_inputscreate_graph_signaturecreate_synthetic_base_metadataremove_dupe_metadata)aot_dispatch_autogradaot_dispatch_baseaot_dispatch_export)callback_setdescribe_inputformat_guard_bug_msgget_aot_compilation_contextget_aot_graph_nameget_graph_being_compiledgraph_being_compiled
model_name	nth_graphset_model_name#setup_stacktrace_preservation_hookstrack_graph_compiling)AOTDedupeWrapperAOTSyntheticBaseWrapper)	AOTConfigBackwardSignatureFQNGraphInputNameGraphOutputNameGraphSignatureInputAliasInfoMutationTypeOutputAliasInfo
OutputTypeSubclassCreationMetaSubclassMetaTensorAliasViewAndMutationMeta)create_metadata_for_subclassrequires_subclass_dispatchunwrap_tensor_subclasseswrap_tensor_subclasses"wrap_tensor_subclasses_maybe_joint)aot_dispatch_subclasscreate_functional_callcreate_functionalized_fn%create_functionalized_rng_ops_wrappercreate_jointfn_input_mutations_to_outputsfn_prepped_for_autograd)_get_autocast_states_get_symint_hintscall_func_at_runtime_with_argscreate_tree_flattened_fnKNOWN_TYPESmake_boxed_compilermake_boxed_funcmaybe_to_fresh_inputnormalize_as_listpartial_flatten_asdict%root_module_when_exporting_non_strict
strict_zip)default_partitionFakifiedFlatArgs	flat_args
aot_config	fake_mode	shape_envreturnc                     5  fd}t        t        |       D cg c]  \  }} |||       c}}      cd d d        S c c}}w # 1 sw Y   y xY w)Nc                    Sddl m} t        t              r=	j                  rS  |d|        }j                  j                  |      |      S t        t        j                        r*t        j                  j                  j                  
      S t        t        j                        sS t        t              rj                  
u sJ S t              r@j!                         \  }}t#        fd|D              rt#        
fd|D              sJ S d }d }d}t        j$                  j&                  j)                         x}r+|j*                  v r|j*                     }|j,                  }d}| 	j.                  k  r%t0        j2                  r|s
j5                  d	      S 
j5                  d|||
      S )Nr   )ConstantSourcesym_)hintsourcec              3   R   K   | ]  }t        t        |      t                 y wN)
isinstancegetattrr   ).0attrxs     b/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/torch/_functorch/aot_autograd.py	<genexpr>z2process_inputs.<locals>.convert.<locals>.<genexpr>  s     RDz'!T"2J?Rs   $'c              3   N   K   | ]  }t        |      j                  u   y wrx   )rz   rn   )r{   r|   rn   r}   s     r~   r   z2process_inputs.<locals>.convert.<locals>.<genexpr>  s(      DH4(22i?s   "%TF)static_shapes)r   symbolic_contextrv   trace)torch._dynamo.sourcers   ry   int	is_exportcreate_symintnodecreate_symboltorchScriptObject_libraryfake_class_registrymaybe_to_fake_objr   r   rn   r   __tensor_flatten__all_guardsTracingContexttry_gettensor_to_contexttensor_sourcenum_params_buffersr   static_weight_shapesfrom_tensor)idxr}   rs   rv   attrs_r   r   tracing_contextrm   rn   ro   s    `       r~   convertzprocess_inputs.<locals>.convert  s   $?a%!++ +d3%L9F$66!//6:6 7   !U//0~~99KKq  a.!Z({{i///,Q///1qRERR LQ    H  $FE"'--">">"F"F"HHH999'6'H'H'K$-;;F "Ej333//( !,,Qd,CC((#!1 )      )rk   	enumerate)rl   rm   rn   ro   r   r   r}   s    ```   r~   process_inputsr     sZ     
 ;V7	r  y?S TVS!a TUw;V ;Vv !Uw;V ;Vs   AAAAAc                     t        |       }|(|j                  r
t               nd }t        |      }||fS |j                  }||fS )N)ro   )r   dynamic_shapesr   r   ro   )rl   rm   rn   ro   s       r~   construct_fake_moder     sT     !+I",";";HJ	"Y7	 y!! ''	y!!r   fake_flat_argsc                 b    t        d      5  t        | ||||      cd d d        S # 1 sw Y   y xY w)Ncreate_aot_dispatcher_function)r   _create_aot_dispatcher_function)flat_fnr   rm   rn   ro   s        r~   r   r     s5     
6	7 
.^ZI

 
 
s   %.c                 H	   |j                   i |_         i t        |j                   |_         t        j                  ri t        |j                   |_         |
t               n	t               }t        j                  j                  d      5  t               5  |5  |5  t               5  t        j                  j                  j                         5  ddlmm fd}t%        d |D              }t               5  t'        dd       5  t)        |       }|t+        |      }	n
t               }	|	5   t-        | |j.                  |j0                  ||j2                  	       ||       ddd       t5        |      }
t%        fd
j6                  D               xr t%        d j8                  D               }|r|rd}|
r2 t-        | |j0                  d|j2                  |j.                        | nzt;        j8                  j6                  j<                  |j0                  j>                  j@                  jB                  jD                  djF                  j.                        ddd       ddd       j<                  dkD  r<
r:J ddjI                  |D cg c]  }tK        tM        |             c}       d       |jN                  rtQ        j8                  D cg c]  }|jR                  s| c}      dk7  rtU        dtK                     tQ        j8                  D cg c]  }|jV                  r|jX                  r| c}      dk7  rtU        dtK                     
rtU        d      t        j                  rtU        d      d } |||      } ||  ||      |      \  }|fcddd       cddd       cddd       cddd       cddd       cddd       S # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY wc c}w c c}w c c}w # 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)a(  
    Traces the forward and backward graphs of the attr:`flat_fn` to generate a
    joint graph. The joint graph is an Fx graph with Aten ops. Please refer to
    the tracing mechanism to understand the graph capturing details.

    The joint graph is then passed through attr:`partition_fn` to isolate the
    forward and backward portions, which are then respectively compiled via the
    provided attr:`fw_compiler` and attr:`bw_compiler`.

    The resulting compiled forward and backward graphs are then wrapped up in a
    ``torch.autograd.Function`` object.

    The calling convention here is that the first aot_config.num_params_buffers
    inputs in flat_args are parameters and buffers, and the rest are inputs.

    We use this to assume that parameters/buffer's shapes don't change.

    Note: this function is used both by aot_function and aot_export (controlled by aot_config.is_export)
        When aot_config.is_export is True, we return an FX graph + metadata
        When aot_config.is_export is False, we return an ordinary runtime function
    NFr   )FakeScriptObjectr   c                 ~    | D cg c],  }t        |      r t        |       |j                        n|. c}S c c}w rx   )ry   r   real_obj)r   argr   r   s     r~   _dup_fake_script_objz=_create_aot_dispatcher_function.<locals>._dup_fake_script_objY  sM    
 *	  c#34 ""2>"BCLLQ  s   1:c              3   V   K   | ]!  }t        |t              s|j                   # y wrx   )ry   r   requires_gradr{   r}   s     r~   r   z2_create_aot_dispatcher_function.<locals>.<genexpr>a  s"      
 !z!V7LAOO
s   ))ztorch.cuda.set_rng_statec                       y rx    )argss    r~   <lambda>z1_create_aot_dispatcher_function.<locals>.<lambda>h  s    r   )static_input_indiceskeep_input_mutationsis_trainpre_dispatchc              3      K   | ]d  }|j                   xrR |j                  t        j                  t        j                  fv xr# j
                  |j                     j                     f y wrx   )r   output_typerM   alias_of_inputis_input
input_infobase_idx)r{   r}   fw_metadatas     r~   r   z2_create_aot_dispatcher_function.<locals>.<genexpr>{  so      3  OO 	 &55z7J7JKL M'221::>LL	3s   A*A-c              3      K   | ]<  }|j                   xr* |j                  xr |j                   xr |j                    > y wrx   )r   mutates_data)mutations_under_no_grad_or_inference_modemutations_hidden_from_autogradr   s     r~   r   z2_create_aot_dispatcher_function.<locals>.<genexpr>  sS      
 	 OO ==GGG= <<<=s   AA)r   r   r   r   )r   output_infonum_intermediate_basesr   traced_tangentssubclass_inp_metasubclass_fw_graph_out_metasubclass_tangent_metar   tokensr   zCtorch.compile is currently being used with tensor subclass inputs:
,z. We are attempting to a compile a graph with two graph outputs
that alias one another, which is currently unsupported in the subclass use case. If you run into this,
please file a github issuezFound an input that received a metadata mutation, through e.g. a call to `.resize_()` or `.transpose_()`.
This is currently banned in the aot_export workflow. If you need this functionality, please file a github issue.

fw_metadata=zFound a graph input that requires gradients, and received a mutation.
This is currently banned in the aot_export workflow. If you need this functionality, please file a github issue.

fw_metadata=zaot_export is not currently supported with traceable tensor subclass.
If you need this feature, please comment on <CREATE_ISSUE_LINK>zFunctionalized RNG is not currently supported in the aot_export workflow. Please file a github issue,
or otherwise set torch._functorch.config.functionalize_rng_ops = False.c                 p    |j                   rt        t        |       S | r|j                  st        S t
        S )zF
            Pick a dispatcher based on the config rules.
            )needs_autograd)r   r   r5   r   r3   r4   )r   rm   s     r~   choose_dispatcherz:_create_aot_dispatcher_function.<locals>.choose_dispatcher  s4     ## 2>RR
(?(?,,((r   )r   )-decompositionsaot_autograd_decompositionsr   functionalize_rng_opsr   r   r   r   autogradset_multithreading_enabledr   r   _dynamoutils+_disable_saved_tensors_hooks_during_tracing"torch._library.fake_class_registryr   r   anyr   rh   _detect_attribute_assignmentr"   r   keep_inference_input_mutationsr   rS   r   r   rQ   r   r   r   r   r   r   joinstrtyper   lenmutates_metadataRuntimeErrorr   r   )r   r   rm   rn   ro   python_dispatcher_moder   r   modctxreq_subclass_dispatchoutput_and_mutation_safer}   r   compiler_fncompiled_fnr   r   r   s                   @@@r~   r   r     s   B   ($&
!!
%!

#
#!J
 ##%
 %
''%

! '0&; "  
	2	2
 s(s( 	s(
 	s( s( #]]00\\^s(	
	  
%3
 
 &' I	 13EF F;GD?6s;C%-C =#"L-7-L-L-7-V-V!/%/%<%<# ,N;#=K= )C"K)% 03 3 )443 0 , ,  
 )33   )( "&> &+N -'&P#1;1Z1Z%*)3)@)@1;1P1P' *'+ ':'2'='=(3(?(?3>3U3U1;1Z1Z,7,G,G.9.K.K7B7]7]2=2S2S%*#.#5#51<1Q1Q'uFI	V --1,  3
0A3tAw<
01 2 3/ ,  {55L!9K9KALMQRR" 	"   "-!7!7??q~~   # 	"  %"C  ++"K 	) (
C#. 0#	$
 [ K'gs( s( s( s( s( s( s(H= =F FI	 I	\ 1 Ms( s( s( s( s( s( s( s( s( s( s( s( s( s( s( s( s(s  RRQ.Q	*)Q+P/>P&P28O9*DP0P8*P/"P =-P/*P%<P% 4P/4!P*A-P/	Q	Q		Q.	R&	R9P>PPPPP//P84Q;	Q	Q	Q		Q.Q"Q.%	R.Q73R:	RR	RR!FT)dynamic
enable_logfnfw_compilerbw_compilerpartition_fnr   r   r   inference_compilerc                     ||}||}t        ||||||t        t              ||ddd|	      dt                fd       }
|
S )a
  
    Traces the forward and backward graph of :attr:`fn` using torch dispatch
    mechanism, and then compiles the generated forward and backward graphs
    through :attr:`fw_compiler` and :attr:`bw_compiler`.

    :func:`aot_function` traces the forward and backward graph ahead of time,
    and generates a joint forward and backward graph.  :attr:`partition_fn` is
    then used to separate out forward and backward graphs. The partitioner
    function can be used to perform optimizations such as recomputation. One can
    set `decompositions` dictionary to decompose the operators into a sequence
    of core or simpler operators supported by the backend compilers.

    .. warning::
        This API is experimental and likely to change.

    Args:
        fn (Callable): A Python function that takes one ore more arguments. Must
            return one or more Tensors.
        fw_compiler (Callable): A Python function that accepts an Fx graph with
            Aten ops and input args, and returns a Callable that semantically is
            equivalent to the input Fx graph.
        bw_compiler (Optional[Callable]): A Python function that accepts an
            Fx graph with Aten ops and input args, and returns a Callable that
            semantically is equivalent to the input Fx graph.  Default: None
            (when None, it defaults to the :attr:`fw_compiler`)
        partition_fn (Callable): A Python function that takes a joint forward
            and backward graph, and partitions it into separate forward and
            backward graphs.
        decompositions (Dict): A dictionary to define the decomposition of
            larger Aten ops into simpler or core Aten ops.
        inference_compiler (Optional[Callable]): A Python function that accepts an
            Fx graph with Aten ops and input args, and returns a Callable that
            semantically is equivalent to the input Fx graph. inference_compiler is invoked
            if no autograd is needed. Default: None
            (when None, it defaults to the :attr:`fw_compiler`)
    Returns:
        Returns a ``Callable`` that retains the eager behavior of the original
        :attr:`fn`, but with forward and backward graph compiled via
        :attr:`fw_compile` and :attr:`bw_compile`.

    A simple example usage of :func:`aot_function` is as follows. This example
    will print the forward and backward graphs of the function ``fn``

        >>> fn = lambda x : x.sin().cos()
        >>> def print_compile_fn(fx_module, args):
        >>>     print(fx_module)
        >>>     return fx_module
        >>> aot_fn = aot_function(fn, print_compile_fn)
        >>> x = torch.randn(4, 5, requires_grad=True)
        >>> aot_fn(x)
    NF)r   r   r   r   r   r   aot_idr   r   aot_autograd_arg_pos_to_sourcer   no_tangentsr   c                      t        j                  | i |}Ct        | |      \  }}t        |      \  }}t	        |||      }t        ||||      \  }}	||f\  }
} |
|      }|j                  |      S rx   )pytreearg_tree_leavesra   r   r   r   	unflatten)r   kwargsrl   r   out_specrn   ro   r   r   r   	cached_fnoutrm   
cached_resr   s               r~   returned_functionz'aot_function.<locals>.returned_functionW  s     **D;F;	  8T6 JGX%8J%O"Y	/=:y)0N <NK &x0J(	8	"!!#&&r   )rD   nextAOT_COUNTERr   )r   r   r   r   r   r   r   r   r   r   r   rm   r   s   `          @@r~   aot_functionr      sx    D !!(-!%-K 'E'+J J
2Y' '2 r   r   c                 l    t         j                  j                  j                           fd}t	         j                  d            t	         j                  d            t              t              z   }t        |g|d|i| G  fddt        j                        } |       S )a  
    Traces the forward and backward graph of :attr:`mod` using torch dispatch
    tracing mechanism. It is wrapper function, that underneath uses
    :func:`aot_function` to perform tracing and compilation.

    :func:`aot_module` lifts the parameters and buffers of ``nn.Module`` as inputs
    to a new callable which is then compiled through :func:`aot_function`.

    .. warning::
        This API is experimental and likely to change.

    Args:
        mod (Callable): A ``nn.Module`` module.
        args : args to be passed to :func:`aot_function`
        kwargs : kwargs to be passed to :func:`aot_function`

    Returns:
        Returns a ``nn.Module`` that retains the eager behavior of the original
        :attr:`mod`, but with forward and backward graph compiled.

    c                 T    i | |}t         j                  j                  |||      S rx   )r   funcfunctional_call)named_paramsnamed_buffersr   r   params_and_buffersr   s        r~   r  z#aot_module.<locals>.functional_call  s/    >>>zz))#/A4PPr   Fremove_duplicater   c                   2     e Zd Zd fdZfdZ xZS )aot_module.<locals>.AOTModulec                 0    t         |           | _        y rx   )super__init__orig_module)self	__class__r   s    r~   r  z&aot_module.<locals>.AOTModule.__init__  s    G"Dr   c                      g|i |S rx   r   )r  r   r   
compiled_fr  r  s      r~   forwardz%aot_module.<locals>.AOTModule.forward  s*      	 r   )rp   N)__name__
__module____qualname__r  r  __classcell__)r  r  r   r  r  s   @r~   	AOTModuler
    s    	#	 	r   r  )r   r   r    assert_no_fake_params_or_buffersdictnamed_parametersr  r   r   nnModule)	r   r   r   r  r   r  r  r  r  s	   `     @@@r~   
aot_moduler  t  s    . 
MM88=Q ,,e,DEL**E*BCM\*S-??3EIOJ BII  ;r   
cudagraphsc	                 L     i t         j                  d            t         j                  d            }	t        j                  |	      \  t              t              |7t        t        j                  j                  j                  j                        }||}||}t               }
g }|j                         t        j                  j                   j#                         x}r|_        d}t'         d      rkg }|	j)                         D ]V  }| j*                  v s
J | d        j*                  |   }||
vsJ |       |
j-                  |       |j/                  |       X |j                  |       g }t'         d      rt1         j2                  j5                  d            D ]  \  }}t'        |d	      s|g }|j6                  }||
vsJ |       |
j-                  |       |j/                  |       |r|j9                         n
t;        |      }d
|j<                  v rH|j<                  d
   j?                  dd      r)t@        jC                  d||       |j/                  |       t@        jC                  d||        |t        |      t        |      k(  sJ d}|D ],  }tE        |tF              s|jH                  jJ                  du} n tM        |||||tO        tP              ||||ddd      tS        |      \   tU        |         fd}t        jV                  rtY        jZ                  | |      n |       tE         t        j\                  j^                  j`                        rIdtb        td           ffd} jf                  |_3         j                  |_         j                  |_        |S dth        td           ffd} jf                  |_3         j                  |_         j                  |_        |S )a  
    This is the simplified or low overhead version of aot_module. For frontends
    like TorchDynamo, the input functions/modules to AOT are static and have
    unpacked inputs/outputs. This gives us an opportunity to remove the
        (1) pytree overhead to parse inputs/outputs,
        (2) AOT Autograd cache,
        (3) Reading of params/buffers in every forward call

    :func:`aot_module_simplified` removes these overheads.
    Fr  N_param_name_to_sourcez not found.graphplaceholder)op_dynamo_sourcetensor_dict_dynamo_static_input_typez(Adding static input pos %s for source %sz%Non-static input pos %s for source %s)r   r   r   r   r   r   r   r   r   r   r   r   r   	cache_keyc                      t              } t        j                         5  t        | 	      \  }}d d d        |S # 1 sw Y   S xY wrx   )rX   r   disabler   )
r  r   r   rm   r   rn   r   
params_lenparams_specro   s
      r~   dispatch_and_compilez3aot_module_simplified.<locals>.dispatch_and_compile  sZ    0k:N&&( 	;NK	 	 s   A  A
runtime_argsc                 |    g }|j                         |j                  |        | j                           |      S rx   )extendclear)r.  rl   r   params_flats     r~   boxed_forwardz,aot_module_simplified.<locals>.boxed_forward6  s;    I[)\* y))r   c                  \    g }|j                         |j                  |         |      S rx   )r0  )r.  	full_argsr   r2  s     r~   r  z&aot_module_simplified.<locals>.forwardH  s/    	%&9%%r   )5r  r  r  r   tree_flattenlistr   r   r   	_inductorr   tritonr  setr0  r   r   r   r2  hasattrkeysr!  addappendr   r"  
find_nodesr%  namer   metagetstatic_inputs_logdebugry   r   rn   ro   rD   r   r   r   r   enable_autograd_cacher    loadr   r   	GmWrapperr
   r   	zero_gradr   )!r   r   r   r   r   r   r   r   r  paramsseen_sourcesr5  r   r   r@  rv   r   posnodesource_namer   r}   r-  r3  r  rm   r   r   rn   r2  r+  r,  ro   s!   `                        @@@@@@@@r~   aot_module_simplifiedrN    s   *
s##U#;
<
s  % 8
9F  &226:K{#K[!Ju55<<GGH
!!(5LI[!--66>>@@@&1#%)"s+,)+& KKM 	:D3444Jk6JJ4..t4F-5v5-V$*11&9	: TsG"399#7#7=#7#IJ 	ICt-.19572,,\19691  (.55f=/5fkkm3v; DII-$))M2J2N2N/3 &++BC )//4%++?k'	. &19~%C!DDDDN a$[[22$>N
 -!%%K 'E%'E1J  /y*EIy#Iz9iPN
 
 ##&++ #~z:
 +,#u}}**445
	*S	 	* #&--),)=)=&&)&7&7#&uSz & G"33G--GNr   )r   output_loss_indexr   r   r   trace_jointrO  r   r   c                6   |r|rt        d      t        | j                  d            }t        | j                  d            }	i t        |      t        |	      }
t	        j
                  |
      \  }}t        |      }t        |      }|xs i }t        | ||d      d|rfd}t        }n|rt        nt        j                  }}g }|j                  |       |j                  |        |       5  t        ||||d|||      \  }}ddd       |rfd	} t        |      | t	        j                  |i |}t!        ||t#        |j%                               t#        |	j%                               |
      fS # 1 sw Y   zxY w)a  
    This function takes in a module, and returns:
    (1) an FX graph that can be exported
    (2) some metadata about the graph

    If `trace_joint=True` we will return a joint graph of the forward + backward.

    The traced FX graph will have the following properties compared to the original module:
    (1) Inputs and outputs to the module will be pytree-flattened
    (2) Parameters and buffers on the module will be lifted into graph inputs,
        graph_inputs = (*parameters, *buffers, *user_inputs)
    (3) The graph will be fully functionalized
    (4) Any input mutations will be converted into additional outputs in the graph,
        meaning whoever calls this graph is responsible for applying the mutations
        back to the original inputs.
    (5) If is_joint is provided the graph will return parameter gradients in addition to user outputs.
        The graph output will look like:
        graph_outputs = (*updated_inputs, *user_outputs, *param_gradients)

    There are also several restrictions on what modules can use this API. In particular:
    (1) If trace_joint is specified, we expect the loss function to be **fused**
        into the module forward. One of the outputs to the forward must be a scalar loss,
        which is specified with `output_loss_index`.
        All other outputs to the forward are presumed to not require gradients.
    (2) This API cannot capture optimizers (although in theory we could build an API for this).
    (3) Metadata mutations on params/buffers/inputs are banned.
    (4) Data mutations on anything that requires gradients are banned (parameters)
    (5) If an input is mutated, it is not allowed to alias any other inputs.
    (6) Parameters must not be duplicated.
    z7pre_dispatch is not supported when trace_joint is True.Fr  T)store_orig_modNc                      |  }t        d      t        |t        j                        r|f}t        |t        t
        f      st        dt        |             t        |      D ])  \  }}|j                  s|k7  st        d d| d       |   }t        |      |j                  st        d d      |j                         dk7  rt        d	 d
|j                         |S )NzIf trace_joint=Trueit is required that one of your forward outputs must be a scalar loss.
You must specify the which (index) output is the loss with output_loss_index.zPExpected forward output to be either a tensor or a list/tuple of tensors. found au  Found an output of the forward that requires gradients, that was not the scalar loss.
We require all outputs to the forward that are not the scalar loss to not require gradient,
because we will only compute a backward graph against the scalar loss.
You can fix this by calling .detach() on each of your forward outputs that is not the loss.
You specified that output index z4 is the loss, but we found that
the output at index z requires gradients.zThe output at index z: was marked as the loss, but it does not require gradientsr   z3We require the output marked as the loss (at index z#) to be a scalar, but it has shape )r   ry   r   r   tupler7  r   r   r   r   numelshape)r   r   ioout_lossr  num_fw_outsrO  s        r~   fn_to_tracez&aot_export_module.<locals>.fn_to_trace  sE   !4(C ("Q 
 #.fcE4=1"fgklogpfqr  "# 1 ??q,='=&!
 "3 3 4C+/   ,-Hc(K))"&''ae  ~~1$"44E3FFijrjxjxiy}  Jr   )r   r   r   r   r   r   c                     t        
j                  
j                  z         D cg c]  }d  }} 	| |      \  }}t        |      t        |       k(  sJ g }t	        t        | |            D ]M  \  }\  }}t        |t        j                        r'|j                  r|J d       |j                  |       J|MJ  g ||S c c}w )NzFound a parameter that did not receive a gradient.
"This is most likely a bug, but if this needs to be supported please comment on this Github issue:
https://github.com/pytorch/pytorch/issues/101192
)rangenum_outputsnum_mutated_inp_runtime_indicesr   r   zipry   r   r   r   r>  )r   r   fake_tangentsfw_outs	gradientsoutput_gradientsrW  agradfx_gmetadatas            r~   flattened_jointz*aot_export_module.<locals>.flattened_joint  s    & ((8+S+SS M  "&dM!:GYy>SY...! )#dI*> ? (9Ata.1??(( %++D1<'<( /G....+s   	C)user_args_flatparams_and_buffers_flatparam_namesbuffer_namesrP  num_user_fw_outs
loss_index)r   r  r  r  r   r6  rT  r   rX   r   r   no_gradr0  _aot_export_functionr   r   r0   r7  r<  )r   r   r   rP  rO  r   r   r   r  r  r  rk  r,  r+  r[  r   r5  in_specr   ri  rj  r  rg  rh  rZ  s       `                @@@@r~   aot_export_modulers  V  s   \ TUUC00%0HI**E*BCM

 
}
 ,2+>+>?Q+R([#$;<,-J\rF,[*TO K
,	\  *ku}}%I ,-T	 

,@))%)	-
)h

 &	/P (w'3++T<V<N'% 7)..01-,,./$$  q

 

s   =FF)r   r   r  c                   |rt         }nt        j                  } |       5  t        | ||      \  }}}}	|j                  \  }}
ddd       t        j                  D cg c]  }|j                  s|j                  s| c}      dk7  rt        dt        |             t        |j                  D cg c]"  }|j                  t        j                  k7  s!|$ c}      dk7  rt        dt        |             j                         rt        dt        |             t!        d |j                  D              st        dt        |             	j                         rt        d	t        |	             t!        d
 |	j                  D              st        dt        |	             t"        j$                  r[t&        j)                  |t        t*        j,                              \  }}t/        |      }|
t1               }|5   ||  ddd       |S S # 1 sw Y   xY wc c}w c c}w # 1 sw Y   |S xY w)a  
    A simplified version of export. Used by higher order operators.

    This function makes a high-level "no calling convention changes" guarantee:
    - If no inputs require grad (so we export an inference graph),
      there are *no* calling convention change between the exported graph, and "func".
    - If at least one input requires grad (so we trace out and export a joint fw-bw graph),
      Then if you were partition the graph into a separate forward and backward graph,
      The forward graph will have no calling convention changes compared to "func".

    The above also relies on some strong restrictions around which functions this API accepts:
    (1) `args` cannot contain any pytrees (they must have been pytree_flattened already)
    (2) `func` cannot mutate any inputs
    (3) The outputs of `func` cannot alias any inputs.

    Note: this function is only lightly tested today. It will probably be tested more heavily by higher order ops.
    )r   Nr   z:aot_export_joint_simple does not support input mutations. zDaot_export_joint_simple does not support outputs that alias inputs. zKaot_export_joint_simple requires inputs to be a single list/tuple. in_spec=c              3   <   K   | ]  }|j                           y wrx   is_leafr{   childs     r~   r   z*aot_export_joint_simple.<locals>.<genexpr>g  s     C5u}}C   zNaot_export_joint_simple requires individual inputs not to be pytrees. in_spec=zMaot_export_joint_simple requires outputs to be a single list/tuple. out_spec=c              3   <   K   | ]  }|j                           y wrx   rv  rx  s     r~   r   z*aot_export_joint_simple.<locals>.<genexpr>o  s     D5u}}Drz  zPaot_export_joint_simple requires individual outputs not to be pytrees. out_spec=)num_fwd_outputs)r   r   rp  rq  children_specsr   r   r   r   r   r   r   r   rM   	non_aliasrw  r   r   debug_assertrm   rj   r   output_infosr   r   )r  r   rP  r   r   r   rg  rh  rr  r   _kw_in_specr}   	fw_module	bw_modulern   s                  r~   aot_export_joint_simpler  $  sK   8  mm	 6,@)-
)h
  '556 	++T1q~~ASASQTU	 HXX
 	

 	,,V1AUAU0UQVW	 RSVW_S`Rab
 	
 YZ]^eZfYgh
 	
 CG,B,BCC\]`ah]i\jk
 	
 [\_`h\i[jk
 	
 DH,C,CDD^_bck_l^mn
 	
 );;$K,D,D(E  <  
	9 %T*	&(I 	t	K4Kw6 6 U WF	Ks/   #HH+=H+5"H0H0H5H(5H?)r   r   r   r   r   r   r   c                   |xs i }t        | ||      \  }}	t        j                  ||f      \  }
}|t        |
      }|t	        | d      rt        | j                  t        j                  j                        rS| j                  j                  j                  D cg c]  }d|j                  v r|j                  d   ! }}t        |      }|d uxr |j                  d u}t        d d d d ||t        t               d|d d||      }t#        |
|      \  }}t%        |
|||      }t'        |||||      \  }}||||	j(                  fS c c}w )N	_orig_modvalFT)r   r   r   r   r   r   r   r   r   r   r   r   r   )ra   r   r6  r   r;  ry   r  r   fxGraphModuler"  nodesrA  ro   rD   r   r   r   r   r   spec)r  r   r   r   r   r   r   r   r   r   rl   rr  rn   rL  valsrm   ro   r   rg  rA  s                       r~   rq  rq    sp   & \rF0tVDGX,,dF^<Iw$Y/	k*4>>588+?+?@ !NN0066DII% 		% D 
 ).I"$.R93F3Fd3R
 %-K  (-%'+!!J$ /y*EIy#Iz9iPN/JD$ w--Os   $Ec              #   h  K   h dfd}t        j                  d  ||             }	 d  g fd}t        j                  || ||              | j                  j	                  |       r=t              dkD  rd\  }}nd\  }}t        d| d	d
j                         d	| d      y # g fd}t        j                  || ||              | j                  j	                  |       r=t              dkD  rd\  }}nd\  }}t        d| d	d
j                         d	| d      w xY ww)N>   _buffers_modulestraining_parameters_forward_hooks_backward_hooks_state_dict_hooks_forward_pre_hooks_backward_pre_hooks_state_dict_pre_hooks_is_full_backward_hook_forward_hooks_with_kwargs_load_state_dict_pre_hooks_load_state_dict_post_hooks_non_persistent_buffers_set_forward_hooks_always_called_forward_pre_hooks_with_kwargsc                 v    | j                   j                         D ci c]  \  }}|vs|| c}}S c c}}w rx   )__dict__items)r   kv	STD_ATTRSs      r~   _get_attributesz5_detect_attribute_assignment.<locals>._get_attributes  s3    !$!3!3!5LA)9K1LLLs   55c                     | S rx   r   )r}   s    r~   r   z._detect_attribute_assignment.<locals>.<lambda>  s     r   c                     ||urS| ^}}t        |t        j                        r4j                  d|j                   t        j                  |              y y y )Nzself.)ry   r   r   r>  keyr   keystr)kpr  _vr|   restassigned_tensor_attributess        r~   #_collect_assigned_tensor_attributeszI_detect_attribute_assignment.<locals>._collect_assigned_tensor_attributes  sU    { ta..55z&--*=)>? / r   r   )
attributeswere)	attributewaszThe tensor  z, z assigned during export. Such attributes must be registered as buffers using the `register_buffer` API (https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.register_buffer).)r   tree_maptree_map_with_pathr  updater   
ValueErrorr   )r   r  snapshotr  nounverbr  r  s         @@r~   r   r     sn    
I(M
 {OC,@AH  &("	 	!!/?3;O	
 	H%%-.21
d/
ddV1TYY/I%J$K1TF St t  &% &("	 	!!/?3;O	
 	H%%-.21
d/
ddV1TYY/I%J$K1TF St t  &s   'D2B/ A?D2/B D//D2)	itertools
contextlibr   r   	functoolsr   r   typingr   r   r	   r
   r   r   r   unittest.mockr   r   torch._dynamo.loggingtorch.nnr  torch.utils._pytreer   _pytreer   torch.utils.dlpackr   $torch._decomp.decompositions_for_rngr   r   torch._dispatch.pythonr   torch._dynamor   torch._dynamo.utilsr   r   torch._guardsr   torch._inductor.utilsr   torch._subclassesr   r   "torch.fx.experimental.proxy_tensorr   %torch.fx.experimental.symbolic_shapesr   torch.utils._python_dispatchr   _logginggetArtifactLoggerr  rC   r   _aot_autograd.autograd_cacher    r!   '_aot_autograd.collect_metadata_analysisr"   _aot_autograd.functional_utilsr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   #_aot_autograd.input_output_analysisr.   r/   r0   r1   r2   *_aot_autograd.jit_compile_runtime_wrappersr3   r4   r5   _aot_autograd.logging_utilsr6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   _aot_autograd.runtime_wrappersrB   rC   _aot_autograd.schemasrD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   _aot_autograd.subclass_utilsrR   rS   rT   rU   rV   (_aot_autograd.traced_function_transformsrW   rX   rY   rZ   r[   r\   r]   _aot_autograd.utilsr^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   partitionersrj   r`  countr   r   rk   r   r   r   r   r   boolr   r  r  rN  r  r  rs  r  TreeSpecrq  r   compiled_functioncompiled_moduler   r   r~   <module>r     s    2 $ F F F     $ $   W ; + @ * + 8 6 : F NN44'       
              ,  iooz ! -tCy9 AVCyAVAV AV !	AV
 AVH	"Cy	"&/	"
>8H--.	"

$

 

 	


 !

 8(()

n($n( n( 	n(
 !n( 8(()n(h '+.%)+0-1q qqq (#q 	q
 TNq q %)q !*q qh1BII 1299 1p '+.%)#(-1&*k	k k (#	k
 k TNk !*k #k YYkd &* (,%)K	K TN	K K  }K K TNK 588/0Kl  %)]
] 	] ] TN] XX]P  %) %)#G.
G. 	G.
 TNG. G. G.  TN!G.$ 588!4foovVW%G.T >ehhoo > >B ! r   