
    ɯwg                      T   d dl Z d dlZd dlZd dlZd dlmZmZmZmZ d dl	Z	d dl
mZ d dlmc mZ d dl	mZ d dl
mZmZ d dlmZ ej*                  j-                  d      duZej.                   G d d             Z G d	 d
      Zdej4                  fdZd Zdej4                  defdZy)    N)AnyCallableDictList)fx)computationdebug)tensor_factory_functionsdebug_extract_compiled_graphc                   L    e Zd ZU dZeeef   ed<   ee   ed<   ee   ed<   d Z	y)GraphInputMatchera  
    The GraphInputMatcher class setup the graph inputs for future calls after lazy tracing.
    Specifically, those graph inputs corresponding to method parameters should be replaced with the
    arguments for the current call.

    tensor_id_to_arg_idx maps the tensor id to the parameter index.
    graph_input_tensor_ids, graph_input_ivalues list the tensor_id and ivalue for each of the
    TS/XLA graph inputs.
    tensor_id_to_arg_idxgraph_input_tensor_idsgraph_input_ivaluesc                     g }t        | j                  | j                        D ]<  \  }}| j                  j	                  |d       }||}n||   }|j                  |       > |S N)zipr   r   r   getappend)selfargs
real_input	tensor_idtraced_ivaluearg_idxinps          g/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/torch/_lazy/extract_compiled_graph.py__call__zGraphInputMatcher.__call__)   st    
(+'')A)A)
 	#$I} //33ItDG#7mc"	#     N)
__name__
__module____qualname____doc__r   int__annotations__r   r   r    r   r   r   r      s2     sCx.( I% c"r   r   c                       e Zd ZdZd Zd Zy)ReturnValueHandlera  
    When ltc_sync_multi is called on multi tensors, the compiled graph
    will contain output only for unique tensors - if a tensor appears multiple
    times in the input to _ltc_sync_multi, only the first occurance matters.

    However from python level, we still expect multi tensors returned with duplciation
    even if the TS graph dedup the output. e.g. for method:

      def forward(self, a):
        return a, a

    the TS graph captured by LTC will return a single tensor, but Python method expects 2.

    This class dedup the lazy tensors first to get the index that will be used
    to duplicate the eager tensors later.
    c                 R   g | _         t        |      | _        i }t        |      D ]  \  }}|j	                  t        |      d       }|| j                   |   j                  |       Bt        | j                         }| j                   j                  |g       ||t        |      <    y r   )indexlentotal_count	enumerater   idr   )r   lazy_out_listtensor_id_to_idxdup_idxlazy_tensoruniq_idxs         r   __init__zReturnValueHandler.__init__I   s    &(
}-+-$-m$< 	= G['++B{OTBH#

8$++G4tzz?

!!7),4< K1	=r   c                     d g| j                   z  }t        |      t        | j                        k(  sJ t        |      D ]  \  }}| j                  |   D ]  }|||<   	   |S r   )r,   r+   r*   r-   )r   eager_tensor_listduplicated_listr3   eager_tensorr1   s         r   duplicate_eager_tensorsz*ReturnValueHandler.duplicate_eager_tensorsW   st    &4#3#33$%TZZ888&/0A&B 	8"Hl::h/ 8+7(8	8 r   N)r    r!   r"   r#   r4   r9   r&   r   r   r(   r(   7   s    "=r   r(   modelc           	         d d }| j                   j                  D ]  }t        fd|j                  D              |_        |j                  j                         D ci c]  \  }}| |       c}}|_        |j                  t        v sr ||j                  |j                        rt        |j                        }t        j                  d      |d<   ||_         | j                          yc c}}w )z
    Factory methods in a Fx graph may create tensors for a specific eager devices.
    If we take no actions, those eager tensors will be mixed with lazy tensors and
    cause crash. This method overwrite those eager device to lazy device.
    c                 |    t        | t        j                        r!t        j                  d| j                        S | S )Nlazy)r*   )
isinstancetorchdevicer*   )devs    r   tolazydevicez'force_lazy_device.<locals>.tolazydeviceh   s*    c5<<(<<cii88
r   c                 j    t        d t        j                  | |j                               D              S )Nc              3   P   K   | ]  }t        |t        j                           y wr   )r>   r?   r@   ).0args     r   	<genexpr>z:force_lazy_device.<locals>.hasDeviceArg.<locals>.<genexpr>n   s#      
 sELL)
s   $&)any	itertoolschainvalues)r   kwargss     r   hasDeviceArgz'force_lazy_device.<locals>.hasDeviceArgm   s-     
 tV]]_=
 
 	
r   c              3   .   K   | ]  } |        y wr   r&   )rE   rF   rB   s     r   rG   z$force_lazy_device.<locals>.<genexpr>t   s     =cS)=s   r=   r@   N)graphnodestupler   rL   itemstargetr
   dictr?   r@   	recompile)r:   rM   ndkvrL   rB   s         @r   force_lazy_devicerY   a   s    

 kk =RWW==46IIOO4EFDAqQQ'F	 9900GGRYY:
 "))_F$||F3F8BI), 
OO) Gs   !C5c                      g } t        j                         D ]A  }d|vrt        t        j                  |            }|dkD  s,| j	                  | d|        C | S )Nzaten::r   =)metricscounter_namesr$   counter_valuer   )fallback_opsopnamevals      r   get_fallback_opsrb      sg    L'') 36!'''/076(!C5 123 r   returnc                 ^   |D cg c]  }|j                  d       }}|D cg c]  }t        j                  |       }}t        |      D ci c]  \  }}||
 }}}t	        j
                  |       j                  t        j                  d            }	t        |	       t        j                           |	| }
t               }t        j                          t        |      dkD  rt        ddj                  |             t        |
t         t"        f      s|
f}
t!        |      t!        |
      z   t%              t&        r6t)        d| j*                         t)        dt-        j.                  d             t1        j2                        \  }}t        |      t        |      k(  sJ t5        |||      t1        j6                        t&        r&t)        d	       t)        d
|        t)        d|       t        j8                  g        fd}|S c c}w c c}w c c}}w )z
    Optimize an eager model with LTC and returns a wrapper to execute the
    compiled graph directly without retracing. It depends on other mechanisms
    like TorchDynamo guards to guarantee the returned wrapper is only called
    when it's safe.
    r=   )r@   r   z7Fail to extact the compiled graph because of fallback: ,z	Fx code:
zLTC IR:text
graph_hashzargs_tensor_ids ztensor ids from device data:c                  (   t              dk(  ry |       }j                  t        j                  |            }t        |      t              k(  sJ t	        |       D ]!  \  }}|||   us|j                  ||          # |t        |       d  S )Nr   r&   )r+   r9   r   run_cached_graphr-   copy_)	r   graph_inputresirF   args_and_outrg   graph_input_matcherreturn_value_handlers	        r   optimized_modz-extract_compiled_graph.<locals>.optimized_mod   s    |!)$/"::(([A
 3x3|,,,,o 	"FAs#a& 		#a&!	" 3t9;r   )tor=   get_tensor_idr-   copydeepcopyr?   r@   rY   r\   resetrb   r+   RuntimeErrorjoinr>   rQ   listr(   r	   printcode
lazy_debugdump_irr   get_tensors_ts_device_data_noder   get_graph_hash
sync_multi)r:   example_inputsrF   	lazy_argslazy_argargs_tensor_idsrm   r   r   
lazy_modellazy_outr_   r   r   rq   rn   rg   ro   rp   s                  @@@@r   extract_compiled_graphr      s    3AA3v&AIADMNt))(3NON=F=WX\Q	IqLXXu%((V0D(EJj! MMO9%H#%LMMO
<1Echh|F\E]^
 	
 h.;#eHo5L-l;lEJJ'i++L&AB 	33LA%&#.A*BBBB+46I ++L9JlJ'  123,.DE
 	OOL"% " E BNXs   HH$H))rt   dataclassesrI   ostypingr   r   r   r   r?   torch._lazy_lazyr=   torch._lazy.metricsr\   r   r   r	   r|   $torch._lazy.tensor_factory_functionsr
   environr   	dataclassr   r(   GraphModulerY   rb   r   r&   r   r   <module>r      s       	 , ,   % %  8 I 	

56dB      F' 'T(R^^ (V	I".. IX Ir   