
    Ǆg
                        d dl Z d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
mZmZmZmZmZmZ d dlmZ d dlZd dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlm Z  d dl!m"Z" ed	e#f   Z$ed
e#f   Z% ejL                  e'      Z( G d dejR                        Z*e*jV                  de*jX                  de*jZ                  de*j\                  de*j^                  de*j`                  de*jb                  dddiZ2 e3e2      D  ci c]  \  } }|| 
 c}} Z4 G d dejR                        Z5e5D  ci c]  } | | jl                   c} Z7 ejp                  ddd       G d d	             Z9ejp                   G d d             Z: ejp                  ddd       G d d
e9             Z;dedeeee;   ee;   f      fd Z<dedee;   fd!Z=dedeeee;   e;f      fd"Z>d#ee   deed$f   fd%Z? G d& d'      Z@ G d( d)      ZA G d* d+      ZB ejp                          G d, d-             ZC G d. d/      ZD G d0 d1      ZEejp                   G d2 d3             ZFejp                   G d4 d5             ZG G d6 d7      ZH G d8 d9      ZIyc c}} w c c} w ):    N)
AnycastDefaultDictDictIteratorListOptionalSetTupleUnion)Literal)FunctionSchema)_ProfilerResult)
_EventType_ExtraFields_Allocation_ExtraFields_TorchOp_ProfilerEvent_TensorMetadataRecordScope)_element_size)_utilsKey	TensorKeyc                       e Zd Z ej                         Z ej                         Z ej                         Z ej                         Z ej                         Z	 ej                         Z
 ej                         Zy)CategoryN)__name__
__module____qualname__enumautoINPUT	TEMPORARY
ACTIVATIONGRADIENTAUTOGRAD_DETAIL	PARAMETEROPTIMIZER_STATE     g/home/mcse/projects/flask_80/flask-venv/lib/python3.12/site-packages/torch/profiler/_memory_profiler.pyr   r   *   s]    DIIKE		IJtyy{HdiikO		IdiikOr)   r   	darkgreen	goldenrodblackmediumpurplered
mediumblue	royalbluegreyc                       e Zd Z ej                         Z ej                         Z ej                         Z ej                         Zy)ActionN)	r   r   r   r   r    PREEXISTINGCREATEINCREMENT_VERSIONDESTROYr(   r)   r*   r4   r4   B   s7    $))+KTYY[F!		diikGr)   r4   TF)equnsafe_hashfrozenc                   ,    e Zd ZU ej                  ed<   y)r   deviceN)r   r   r   torchr=   __annotations__r(   r)   r*   r   r   L   s    LLr)   c                   N    e Zd ZU dZeed<   eed<   defdZdede	fdZ
defdZy	)
_StoragezBundle storage pointer and id.

    All profiling logic should use `allocation_id`, however it is useful to
    print storage pointers for debugging and unit tests sometimes look up
    values using the storage data pointer of a live Tensor.ptrallocation_idreturnc                 N    t        | j                        dd| j                   dS )Nz>18 ())hexrB   rC   selfs    r*   __repr__z_Storage.__repr__\   s'    dhh-$Bt'9'9&:!<<r)   otherc                 X    t        |t              xr | j                  |j                  k(  S N)
isinstancerA   rC   rJ   rL   s     r*   __eq__z_Storage.__eq___   s%    %*Xt/A/AUEXEX/XXr)   c                 ,    t        | j                        S rN   )hashrC   rI   s    r*   __hash__z_Storage.__hash__b   s    D&&''r)   N)r   r   r   __doc__intr?   strrK   objectboolrQ   rT   r(   r)   r*   rA   rA   Q   sB    ? 
H=# =YF Yt Y(# (r)   rA   c                       e Zd ZU dZeed<   eed<   defdZdd de	fdZ
edee   d	ee   d
ee   dej                  ded    f
d       Zededed    fd       Zedee   ded    fd       Zedeeeeef   fd       Zy)r   a~  Hashable identifier for a storage which has been asigned an ID.

    A detailed description of Tensor IDs and why they are needed is given in
    `torch/csrc/profiler/collection.h` when `TensorID` is declared. To
    summarize, multiple Storage buffers can map to the same logical Tensor.
    This dataclass is used to refer to a concrete in-memory StorageImpl of
    a Tensor.
    idstoragerD   c                 j    d| j                    dt        | j                        dd| j                   dS )Nzid=z: z<24rF   rG   )r[   reprr\   r=   rI   s    r*   rK   zTensorKey.__repr__t   s1    TWWIRT\\ 237r$++aHHr)   rL   c                 4    | j                   |j                   k  S rN   )_as_sortablerP   s     r*   __lt__zTensorKey.__lt__w   s      5#5#555r)   	tensor_idstorage_ptrrC   r=   c                 >    | ||t        || t        ||            S y rN   )r   rA   )rb   rc   rC   r=   s       r*   _makezTensorKey._makez   s/     !')VYm0TUUr)   allocc                 z    | j                  |j                  |j                  |j                  |j                        S rN   )re   r[   rB   rC   r=   )clsrf   s     r*   from_allocationzTensorKey.from_allocation   s)    yy599e.A.A5<<PPr)   tc                     |<| j                  |j                  |j                  |j                  |j                        S y rN   )re   r[   storage_data_ptrrC   r=   )rh   rj   s     r*   from_tensorzTensorKey.from_tensor   s1    =99QTT1#5#5qQQr)   c                     | j                   | j                  j                  | j                  j                  | j                  j
                  fS rN   )r[   r\   rC   r=   typeindexrI   s    r*   r`   zTensorKey._as_sortable   s3    ww22DKK4D4DdkkFWFWWWr)   N)r   r   r   rU   rV   r?   rA   rW   rK   rY   ra   staticmethodr	   r>   r=   re   classmethodr   ri   r   rm   propertyr   r`   r(   r)   r*   r   r   f   s    	GI# I6K 6D 6 C=c]  } 	
 
+	  Q$; Q@U Q Q H_5 (;:O  
 XeCc3$67 X Xr)   noderD   c              #   4  K   | j                   }| j                  d   t        j                  k(  r| j                  d   j                  t
        j                  k(  r| j                  dk(  r|r|d   j                  d   t        j                  k(  r|d   j                  dv r|d   j                  d   j                  rdt        |d   j                  d   j                  d   t              r7d t        j                  |d   j                  d   j                  d         f y | j                  d   t        j                  k(  r| j                  d   }|j                  |j                  J |j                  K|j                  j                   D ]2  \  }}}t        j                  |      t        j                  |      f 4 |j                  L|j                  j                   D ]2  \  }}}t        j                  |      t        j                  |      f 4 y y y w)Nr      ztorch::autograd::AccumulateGrad)zaten::detachz
aten::add_)childrentypedr   TorchOpscoper   BACKWARD_FUNCTIONnameinputsrO   r   r   rm   PyCallmodule	optimizer
parameters)rt   rw   typed_fields_pp_grads         r*   !_extract_parameters_and_gradientsr      s     }}H 	

1+++JJqM;#@#@@II::QKa J$6$66QK >>QKa ''x{((+2215GI))(1+*;*;A*>*E*Ea*HIII 
A*++	+zz!}""*l.D.D.LLL* , 3 3 > > N1f++A.	0E0Ef0MMMN !!- , 6 6 A A N61++A.	0E0Ef0MMMN . 
,s   HHc              #   @   K   t        |       D ]  \  }}|	|  y wrN   r   rt   r   r   s      r*   extract_parametersr      s(     6t< 	6=Gs   c              #   D   K   t        |       D ]  \  }}|	||f  y wrN   r   r   s      r*   extract_gradientsr      s/      7t< 	6V)Os    	 event.c                     g }| rW| j                   d   t        j                  k(  r(|j                  | j                   d   j                         | j
                  } | rWt        |      S Nr   rv   )rx   r   ry   appendrz   parenttuple)r   scopess     r*   
get_scopesr      sT    F
;;q>Z///MM%++a.../  =r)   c                       e Zd ZdZededeee   df   fd       Z	ededee
df   fd       Zedefd       Zededeee
df      fd	       Zy
)SchemaMatchera  Lookup operator schema based on profiled name.

    When profiling we record the operator's name but not the schema. However
    some analysis requires that information. Fortunately we can look up
    registered schema from the recorded name. We do not, however, record the
    overload and so we must compare the profiled arguments with all overloads
    to determine viable matches.

    Note: Once https://github.com/pytorch/pytorch/issues/78871 is completed
    this code will be obsolete.
    rj   rD   .c           
      6   d}| j                  |      D ]_  }|xs |j                  D cg c]  }d c}}t        |j                        D ]'  \  }}||xx   t        |j                  dd      z  cc<   ) a t        |xs d |j                  D              S c c}w )a  Determine which inputs may have mutated based on function schema.

        Note that we don't need to resolve down to a single schema to perform
        this analysis. An input is mutable if it is mutable in any overload. In
        practice, however, it is overwhelmingly common to match a single
        overload. If we cannot find any valid schema then we must be
        conservative and assume all inputs are mutable.
        NFis_writec              3       K   | ]  }d   y wrN   r(   ).0r   s     r*   	<genexpr>z3SchemaMatcher.inputs_are_mutable.<locals>.<genexpr>   s      8! 8s   )match_schemas	arguments	enumerategetattr
alias_infor   r}   )rh   rj   mutableschemar   iargs          r*   inputs_are_mutablez SchemaMatcher.inputs_are_mutable   s     )-''* 	IFB1A1A!BA%!BG#F$4$45 I3
gcnnj%HH
I	I
 W8 8qxx 899	 "Cs   	Bc                      t        d |j                  D              dt        f fdt        fd j                  |j                        xs dD              S )Nc              3      K   | ]^  }t        |t              rt        j                  |      n4t        |t              r#|D cg c]  }t        j                  |       c}n| ` y c c}w wrN   )rO   r   r   rm   listr   r   js      r*   r   z.SchemaMatcher.match_schemas.<locals>.<genexpr>   sb      

  )31o(FI!!!$ ;EQ:MA6q)''*6 


 7s   >A+ A&A+rD   c                     t        | j                        t              k(  xr( t        fdt        | j                        D              S )Nc              3   \   K   | ]#  \  }}j                  ||j                         % y wrN   )_types_matchro   )r   observed
schema_argrh   s      r*   r   z?SchemaMatcher.match_schemas.<locals>.matches.<locals>.<genexpr>  s0      C(Hj   :??;Cs   ),)lenr   allzip)r   rh   	signatures    r*   matchesz,SchemaMatcher.match_schemas.<locals>.matches  sH    v''(C	N: s C,/	6;K;K,LC @ r)   c              3   4   K   | ]  } |      s|  y wrN   r(   )r   sr   s     r*   r   z.SchemaMatcher.match_schemas.<locals>.<genexpr>  s     O1GAJQOs   r(   )r   r}   rY   lookup_schemasr|   )rh   rj   r   r   s   ` @@r*   r   zSchemaMatcher.match_schemas   sS     

 XX

 

		t 	 O 2 2166 : @bOOOr)   c                    t        |t        j                  j                        r(|j	                         }|d u xs | j                  ||      S t        |t        j                  j                        ry|j                  t        j                  j                  j                               r$t        |t              xr t        d |D              S t        j                  j                  t        ft        j                  j                  t        d       ft        j                  j                   t"        ft        j                  j$                  t&        ft        j                  j(                  t*        ft        j                  j,                  t.        ft        j                  j0                  t"        t&        t*        t.        fff}|D ]  \  }}t        ||      st        ||      c S  |d u S )NTc              3   <   K   | ]  }t        |t                y wrN   )rO   r   r   r   s     r*   r   z-SchemaMatcher._types_match.<locals>.<genexpr>  s      6-.
1i(6s   )rO   r>   _COptionalTypegetElementTyper   AnyTypeisSubtypeOfListType	ofTensorsr   r   
TensorTyper   NoneTypero   BoolTyperY   IntTyperV   	FloatTypefloatComplexTypecomplex
NumberType)rh   r   schema_typetype_mapjit_typepy_typess         r*   r   zSchemaMatcher._types_match  sp   k588#8#89%446Kt#Ns'7'7+'NNk588#3#34""588#4#4#>#>#@Ah- # 62:6 3 
 XX  ),XXT
+XX%XXs#XX'XX!!7+XX  4eW"=>K
 #+ 	6Hh+x0!(H55	6 4r)   r|   c                 |    	 d| vry t        t        j                  j                  |             S # t        $ r Y y w xY w)Nz::)r   r>   r   _jit_get_schemas_for_operatorRuntimeError)r|   s    r*   r   zSchemaMatcher.lookup_schemas0  s@    	 4??EFF 		s   / '/ 	;;N)r   r   r   rU   rr   r   r   r	   rY   r   r   r   r   rq   rW   r   r(   r)   r*   r   r      s    
 :#7 :E(4.RUBU<V : :" P2 Pu^S=P7Q P P*  D    B S XeNC4G.H%I  r)   r   c                   N    e Zd ZdeddfdZdee   fdZede	edf   fd       Z
y)OpTreeresultrD   Nc                     |j                         | _        t        t        | j	                         d             | _        y )Nc                     | j                   S rN   start_time_nsxs    r*   <lambda>z!OpTree.__init__.<locals>.<lambda>I  s
    AOO r)   key)experimental_event_tree_root_nodesr   sorteddfs_sorted_nodesrJ   r   s     r*   __init__zOpTree.__init__G  s.    !99;"6$((*:S#TUr)   c              /   f   K   t        j                  | j                  g|i |E d {    y 7 wrN   )r   traverse_dfsr   rJ   argskwargss      r*   r   z
OpTree.dfsK  s*     &&t'7'7I$I&IIIs   '1/1.c                     | j                   S rN   )r   rI   s    r*   sorted_nodeszOpTree.sorted_nodesN  s    !!!r)   )r   r   r   r   r   r   r   r   rs   r   r   r(   r)   r*   r   r   F  sP    V V4 VJh~&> J "eNC$78 " "r)   r   c                   ^    e Zd ZdeddfdZdee   ddfdZede	de
e   fd       Zd	efd
Zy)SizeMapop_treerD   Nc                 b   i | _         |j                  D ]X  }|j                  d   t        j                  k(  r5| j                  |j                  d         D ]  }| j                  |        Y|j                  d   t        j                  k(  sz|j                  d   }|j                  |j                  J |j                  A|j                  j                  D ](  \  }}}| j                  |       | j                  |       * |j                  |j                  j                  D ]C  \  }}}| j                  |       | j                  |       |D ]  \  }}| j                  |        E [ i }	|j                  D ]  }|j                  d   t        j                  k(  s$|j                  d   }
t        j                  |
      }|sKt        |
j                        }|	j!                  ||      }||k7  sx| d| }t"        j%                  d|        | j                   j'                  |	       y )Nr   rv   z vs. z(Mismatch between allocation and free: %s)_valuesr   rx   r   ry   _flat_tensor_inputs_update_valuesr~   r   r   r   
Allocationr   ri   abs
alloc_size
setdefaultlogwarningupdate)rJ   r   rt   rj   r   r   r   r   stateallocationsalloc_fieldsr   new_size
prior_sizedeltas                  r*   r   zSizeMap.__init__T  s   -/(( 	3Dzz!}
 2 2211$**Q-@ +A''*+ A*"3"33#zz!}#**2l6L6L6TTT&&2(4(;(;(F(F 41f++A.++F34  ))5,8,B,B,M,M 3(65++A.++F3$) 3DAq //233	3( -/(( 	WDzz!}
 5 55#zz!}//="<#:#:;H!,!7!7X!FJ "X-#-,eH: >$NPUV!	W$ 	K(r)   rj   c                    t         j                  |      }|||j                  t        j                  k(  rt        d t        |j                  xs dg|j                  xs dg      D              }|t        |j                        z  }|dk\  sJ |        t        | j                  j                  |d      |      | j                  |<   y y y y )Nc              3   2   K   | ]  }|d    |d   z    ywr   rv   Nr(   r   s     r*   r   z)SizeMap._update_values.<locals>.<genexpr>  s     OAAaD1Q4KOs   rv   r   )r   rm   layoutr>   stridedmaxr   sizesstridesr   dtyper   get)rJ   rj   r   n	num_bytess        r*   r   zSizeMap._update_values  s    ##A&?q}U]]1JOQWW^QYY=M1#)NOOAM!''22I>1i[1> #DLL$4$4S!$<i HDLL 2K}?r)   opc              #      K   | j                   D ]2  }t        |t              r| t        |t              s)|E d {    4 y 7 wrN   )r}   rO   r   r   )r  r   s     r*   r   zSizeMap._flat_tensor_inputs  s>      	A!_-At$		 s   5AAAAr   c                      | j                   |   S rN   )r   rJ   r   s     r*   __getitem__zSizeMap.__getitem__  s    ||C  r)   )r   r   r   r   r   r	   r   r   rq   r   r   r   r   r  r(   r)   r*   r   r   S  se    *) *)4 *)XI 9 Id I  4 /9R  !y !r)   r   c                   b    e Zd ZU dZee   ed<   dZee   ed<   e	defd       Z
e	defd       Zy)DataFlowEdgeNinput_versionFmutatedrD   c                     | j                   d u S rN   )r  rI   s    r*   is_allocationzDataFlowEdge.is_allocation  s    !!T))r)   c                     | j                   d u S rN   )r  rI   s    r*   is_deletionzDataFlowEdge.is_deletion  s    ||t##r)   )r   r   r   r  r	   rV   r?   r  rY   rs   r  r  r(   r)   r*   r  r    sR    #'M8C='#GXd^#*t * * $T $ $r)   r  c                       e Zd ZdeddddfdZdeeef   fdZe	deee
eef   f   fd       Ze	deeef   fd	       Ze	de
ed
f   fd       Ze	defd       Zy)DataFlowNoder   graphDataFlowGraphrD   Nc           	          || _         || _        | j                         | _        | j                  j	                         D ]:  \  }}|j
                  s|j                  r | j                  j                  |       < | j                  j	                         D ci c]#  \  }}||| j                  j                  |      f% }}}t        d |j                         D              sJ | d| j                          y c c}}w )Nc              3   ,   K   | ]  \  }}||k(    y wrN   r(   r   s      r*   r   z(DataFlowNode.__init__.<locals>.<genexpr>  s     8da168s   z, )_event_graph_determine_edges_edgesitemsr  r  bumpoutputslookupr   values)rJ   r   r  r   edgekvversionss           r*   r   zDataFlowNode.__init__  s    595J5J5L**, 	&IC||D$6$6  %	&
 ?Cll>P>P>RSdaA4;;--a011SS8hoo&788VXJb:VV8 Ts   (C:c                    t        t        j                  | j                  g            }i }d |D        D ]  }t	        |j
                  t        j                  |            D ]  \  }}t        |t              r?t        j                  |      }|j                  |t                     j                  |       Ut        |t              sf|D ]@  }t        j                  |      }|j                  |t                     j                  |       B   t!        j"                  t$              }|j'                         D ]M  \  }}	|	|r| j(                  j+                  |      nd||   _        d|	v xs t        |	      dk(  }
|
||   _        O |D ]  }|j0                  d   t2        j4                  k(  s$|j0                  d   j6                  dk  sAt        j9                  |j0                  d         }||   }||j.                  
J d|        d |_        |r| j(                  j+                  |      nd|_         |D ]j  }|j0                  d   t2        j4                  k(  s$|j0                  d   j6                  dkD  sAd |t        j9                  |j0                  d            _        l t;        t=        d |j'                         D                    S )	Nc              3   |   K   | ]4  }|j                   d    t        j                  k(  s$|j                   d    6 ywr  )rx   r   ry   r   s     r*   r   z0DataFlowNode._determine_edges.<locals>.<genexpr>  s-     S!!''!*
@R@R2R1771:Ss   %<<TrN   r   rv   zDouble delete: c              3   0   K   | ]  \  }}|	||f  y wrN   r(   r   r)  r*  s      r*   r   z0DataFlowNode._determine_edges.<locals>.<genexpr>  s     Mdaq}Aq6Ms   
	)r   r   r   r  r   r}   r   r   rO   r   r   rm   r   setaddr   collectionsdefaultdictr  r#  r   r&  r  r  rx   r   r   r   ri   dictr   )rJ   subtreemutable_by_keyr  op_inputr   r   
op_input_iedgesmutable_setr  r   r(  s                r*   r!  zDataFlowNode._determine_edges  s   ++T[[M:; JLSwS 	KB%(		=;;B?& K!' h8#//9C"--c359==gF  $/&. K
'33J?&11#su=AA'JKK	K  ''5 . 4 4 6 	-CFI4;;+=+=c+Brc
(
  ;.RE+4F'4Q%,c
"	-  	LAwwqzZ222qwwqz7L7Lq7P//
;Sz{dll&>W/RUQV@WW>#@CT[[%7%7%<"	L  	RAwwqzZ222qwwqz7L7Lq7PMQi//
;<J	R
 FMekkmMMNNr)   c           
          | j                   j                         D ci c]A  \  }}|j                  s0|t        |j                        t        t        |j                        fC c}}S c c}}w rN   )r"  r#  r  rY   r  r   rV   r  rJ   r)  r*  s      r*   r}   zDataFlowNode.inputs  s[     ))+	
 1?? QYYc1??!;<<
 	
 
s   AA(c                     | j                   j                         D ci c]G  \  }}|j                  r|j                  r|j                  r||j
                  dn|j
                  dz   I c}}S c c}}w r   )r"  r#  r  r  r  r  r=  s      r*   r%  zDataFlowNode.outputs  sa     ))+
1!)) AOO+q11DD
 	
 
s   AA..c                 V    t        d | j                  j                         D              S )Nc              3   Z   K   | ]#  \  }}|j                   s|j                  s | % y wrN   )r  r  r0  s      r*   r   z-DataFlowNode.intermediates.<locals>.<genexpr>  s%      
!QQ]]A
s   +++)r   r"  r#  rI   s    r*   intermediateszDataFlowNode.intermediates  s)     
++++-
 
 	
r)   c                 .    | j                   j                  S rN   )r  r   rI   s    r*   
start_timezDataFlowNode.start_time  s    {{(((r)   )r   r   r   r   r   r   r   r  r!  rs   r   rY   rV   r}   r%  rA  rC  r(   r)   r*   r  r    s    Wn W_ W W1O$y,'>"? 1Of 
YdCi(889 
 
 
in- 
 
 
uY^4 
 

 )C ) )r)   r  c                       e Zd ZdeddfdZedeedf   fd       Zd Z	edee
df   fd       Zededee
df   fd	       Zd
edefdZd
eddfdZd
eddfdZy)r  r   rD   Nc                    || _         | j                  |      | _        i | _        | j                  D cg c]  }t        ||        c}| _        | j                  j                  d        | j                          y c c}w )Nc                     | j                   S rN   )rC  r   s    r*   r   z(DataFlowGraph.__init__.<locals>.<lambda>  s
    ALL r)   r   )	_op_tree_extract_leaf_events_leaf_events_active_versionleaf_eventsr  _flow_nodessortvalidate)rJ   r   es      r*   r   zDataFlowGraph.__init__  sk     55g>?A;?;K;KLaLD1L"89 Ms   A<.c                 ,    t        | j                        S rN   )r   rL  rI   s    r*   
flow_nodeszDataFlowGraph.flow_nodes  s    T%%&&r)   c                 0   t               }| j                  D ]^  }t        |j                  j                               }||z  }|r-J |j                  j
                   d|j                   d|        ||z  }` i }| j                  D ]  }|j                  j                         D ](  \  }\  }}|j                  |d      }	|	|k(  r!J |	|f        |j                  j                         D ]*  \  }}|j                  ||      }
||
k\  s	J ||
f       |||<   ,  y )N r   )	r1  rQ  r%  r#  r  r|   r"  r}   r	  )rJ   r%  rt   node_outputs
duplicatestensor_versionsr   r   versionexpectedprior_versions              r*   rN  zDataFlowGraph.validate  s5   .1eOO 	$Dt||1134L </J!Sdkk&6&6%7qQzl#SS>|#G		$ 13OO 	/D%)[[%6%6%8 @!\a*..sA67*?Xw,??*@ !% 2 2 4 /W / 3 3C A-/I'=1II/'.$/	/r)   c                     | j                   S rN   )rI  rI   s    r*   rK  zDataFlowGraph.leaf_events$  s       r)   c                     g dt         dt        fddt         ffd}| j                  |      D ]  } t        t	        d             S )a  Partially traverse the op tree and extract top level ops.

        Consider the following code:
        ```
        with record_function("My annotation"):
            x.zero_()
            y.zero_()
        ```

        The op tree (assuming no Autograd) will look like:
          <Python context>
            TorchOp: "My annotation"
              TorchOp: zero_
                TorchOp: fill_
              TorchOp: zero_
                TorchOp: fill_

        The recursive structure of operator calls makes data flow unwieldy.
        In order to simplify analysis we would like to select the highest level
        ops to represent in the graph. In this case those are the `zero_` ops;
        the fact that `fill_` is called is an implementation detail. We also
        do not want to group everything under "My annotation" as this could
        create overly coarse bundles and lose critical semantics.

        To address this issue we walk over the graph and select the topmost
        torch ops ** which match at least one operator schema **. These form
        the leaves of the first pass through the op tree. (As well as any
        allocations or frees which do are not part of a kernel.) These events
        form the logical nodes in our data flow graph.
        rO  rD   c                     | j                   d   t        j                  k(  xrW | j                   d   j                  t        j
                  k(  xs+ t        t        j                  | j                   d               S r   )	rx   r   ry   rz   r   r{   rY   r   r   )rO  s    r*   leaf_opz3DataFlowGraph._extract_leaf_events.<locals>.leaf_opK  s_    771:!3!33 
  K$A$AA A33AGGAJ?@r)   c                      |       s| j                   t        j                  k(  rj                  |        g S | j                  S rN   )tagr   r   r   rw   )rO  rK  r]  s    r*   children_fnz7DataFlowGraph._extract_leaf_events.<locals>.children_fnQ  s8    qzQUUj&;&;;""1%	::r)   )r`  c                     | j                   S rN   r   r   s    r*   r   z4DataFlowGraph._extract_leaf_events.<locals>.<lambda>[  s
    q r)   r   )r   rY   r   r   r   )r   r`  r   rK  r]  s      @@r*   rH  z"DataFlowGraph._extract_leaf_events(  s^    B -/	~ 	$ 		> 	 5 	A	 VK-FGHHr)   r   c                 F    | j                   j                  |d      }|J |S Nr   rJ  r   )rJ   r   rW  s      r*   r&  zDataFlowGraph.lookup]  s+    &&11#q9"""r)   c                 h    | j                   j                  |d       }|J |dz   | j                   |<   y )Nrv   )rJ  r	  )rJ   r   rY  s      r*   r$  zDataFlowGraph.bumpb  s;    ,,00d;((($1A$5S!r)   c                 ^    | j                   j                  |d      J d | j                   |<   y rc  rd  r  s     r*   deletezDataFlowGraph.deleteg  s1    ##..sA6BBB$(S!r)   )r   r   r   r   r   rs   r   r  rQ  rN  r   rK  rq   rH  r   rV   r&  r$  rg  r(   r)   r*   r  r    s     4  'E,"34 ' '/* !U>3#67 ! ! 2If 2I~s7J1K 2I 2Ih)  
6	 6d 6
)) ) )r)   r  c                       e Zd ZU dZee   ed<    ej                  e	      Z
eeef   ed<    ej                  e	      Zeeef   ed<    ej                  e      Zee   ed<   y)CategoryElementNby_iddefault_factoryby_key
by_version_by_id_keyset)r   r   r   rj  r	   r   r?   dataclassesfieldr5  rm  r   r   rn  TensorAndIDr1  ro  r
   r(   r)   r*   ri  ri  l  sq     $E8H$(9(9(9$(OFDH$%O.?k.?.?PT.UJ[(*+U %6K$5$5c$JM3y>Jr)   ri  c                       e Zd ZU  ej                  d       Zeeef   e	d<   de
deddfdZde
deddfd	Zde
d
ededdfdZde
d
ededdfdZded
edee   fdZy)CategoryDictc                  4    t        j                  t              S rN   )r3  r4  ri  r(   r)   r*   r   zCategoryDict.<lambda>z  s     7 7 H r)   rk  r   r   categoryrD   Nc                     || j                   |j                     _        | j                   |j                     j                  j	                  |       y rN   )r   r[   rj  ro  r2  rJ   r   rv  s      r*   	set_by_idzCategoryDict.set_by_id}  s8    %-SVV"SVV**..s3r)   c                 P    || j                   |j                     j                  |<   y rN   )r   r[   rm  rx  s      r*   
set_by_keyzCategoryDict.set_by_key  s    +3SVV##C(r)   rW  c                 T    || j                   |j                     j                  ||f<   y rN   )r   r[   rn  rJ   r   rW  rv  s       r*   set_by_versionzCategoryDict.set_by_version  s#    :BSVV''g7r)   c                 n    | j                   |j                     j                  j                  ||f|       y rN   )r   r[   rn  r   r}  s       r*   setdefault_by_versionz"CategoryDict.setdefault_by_version  s+     	SVV''22C>8Lr)   c                 
   t        |t              rt        |t              sy | j                  |j                     }|j
                  xs< |j                  j                  |d       xs |j                  j                  ||fd       S rN   )	rO   r   r   r   r[   rj  rm  r	  rn  )rJ   r   rW  elements       r*   r	  zCategoryDict.get  sn    c3
3	(B,,svv&MM <~~!!#t,<!!%%sGnd;	
r)   )r   r   r   rp  rq  r   r   rV   ri  r?   r   r   ry  r{  r~  r  r   r	   r	  r(   r)   r*   rt  rt  w  s    1B1B1BH2G[o-. 4Y 4( 4t 44i 48 4 4C) Cc CX CRV CMM'*M6>M	M

s 
S 
Xh-? 
r)   rt  c                       e Zd ZdeddfdZedeeeee	ef   df   fd       Z
defdZdeeee   f   fdZdee   fd	Zdd
ZddZddZddZddZddZd Zy)MemoryProfiler   rD   Nc                    t        |      | _        t        | j                        | _        t	        | j                        | _        t               | _        | j                          | j                          | j                          | j                          | j                          | j                          | j                          y rN   )r   rG  r  _data_flow_graphr   	_size_maprt  _categories_set_gradients_and_temporaries#_set_parameters_using_python_tracer_set_inputs_set_parameters_using_data_flow_set_activations_set_optimizer_state_set_autograd_detailr   s     r*   r   zMemoryProfile.__init__  s    v -dmm < /'>++-002,,.!!#!!#r)   .c           	      ^    g }i }i } j                   j                         D ]S  }|j                  d   t        j                  k(  s%|j                  d   }|j
                  }|dkD  }|j                  }t        j                  |      }	|	|||	|f<   pt        |j                        }
|j                  |
j                  f}|rU||v r&|j                  |t        j                  |
df|f       d||<   |j                  |t        j                  |
df|f       |j                  |t        j                   |
df| f       |j#                  |d      r.|j                  dt        j$                  |
df| f       V  j'                         }t)        t+        |j-                                     }|j-                         D 
cg c]%  \  }
}|
df|vr|dk(  rdt        j$                  |
|ff' }}
} j.                  j0                  D ]  }|j2                  j5                         D ]  \  }
}|j6                  r,||
df   }|j                  |t        j                  |
dff       nV|j8                  rJ|j:                  j                  }|j<                  }|J |j                  |t        j                  |
|ff       |j>                  s||
df   }|j                  |t        j                   |
||
   ff         |jA                   fd|D               |jC                  d        tE        |      S c c}}
w )	Nr   rv   TFr.  c              3   V   K   | ]   \  }}\  }}||||fj                   |   f " y wrN   )r  )r   timeactionr   rW  rJ   s        r*   r   z)MemoryProfile.timeline.<locals>.<genexpr>  s9      
,fnsG 6C>4>>#+>?
s   &)c                 *    | d   | d   j                   fS r   )valuer   s    r*   r   z(MemoryProfile.timeline.<locals>.<lambda>  s    1Q41"4 r)   r   )#rG  r   rx   r   r   r   r   r   ri   r   r=   rB   r   r4   r7   r6   r8   popr5   _category_snapshotr5  r   keysr  rQ  r"  r#  r  r  r  r  r  extendrM  r   )rJ   outputallocation_timeslive_unknownr   r   r   r  rj   tkeyr   ptr_and_devicesnapshotlast_versionrW  eventsrt   r(  s   `                 r*   timelinezMemoryProfile.timeline  s	   :<>@FH]]&&( 	E{{1~!6!66${{1~)44
 *Q'' 00>#>?$dM%:; l112C&2&6&6

%CN$)\9"MM!"F$<$<sAh
 S <@L8"MM1fmmc1Xz*RSq&..3(ZK&PQ+//F"MM!#V%7%7#qJ; O3	: **,F8==?34 !)9
WT{"22w!| ##c7^49
 9
 ))44 	QD![[..0 Q	T%%(#t5AMM1fmmc1X">?\\11A"00G"...MM1f&>&>g"OP##(#u6AMM1fnnsL<M6N"OPQ	Q  	 
06
 	

 	45V}99
s   >*L)c                 \     | j                   j                  |i |t        j                  k(  S rN   )r  r	  r   r$   r   s      r*   _is_gradientzMemoryProfile._is_gradient  s+    #t##T4V48I8IIIr)   c           	      Z   t               }| j                  j                  D ]}  }|j                  d |j                  j                         D               |j                  d |j                  D               |j                  |j                  j                                 | j                  j                  j                         D ]$  }|j                  d |j                  D               & t        |      D ci c]$  \  }}||f| j                  j                  ||      & c}}S c c}}w )Nc              3   0   K   | ]  \  }\  }}||f  y wrN   r(   )r   r)  r   r*  s       r*   r   z3MemoryProfile._category_snapshot.<locals>.<genexpr>  s     'T91fq!A'T   c              3   $   K   | ]  }|d f 
 ywr   Nr(   r   r   s     r*   r   z3MemoryProfile._category_snapshot.<locals>.<genexpr>  s     &NCQx&N   c              3   $   K   | ]  }|d f 
 ywr  r(   r  s     r*   r   z3MemoryProfile._category_snapshot.<locals>.<genexpr>  s     &KCQx&Kr  )r1  r  rQ  r   r}   r#  rA  r%  r  r   r'  ro  r   r	  )rJ   all_tensor_versionsrt   r   r   rW  s         r*   r  z MemoryProfile._category_snapshot  s   03))44 	=D&&'T@Q@Q@S'TU&&&N4;M;M&NN&&t||'9'9';<	=
 !!))002 	LA&&&K1??&KK	L
 !'': ;
W 'ND,,00g>>
 	
 
s   :)D'c                 P    t               	 t              } j                  j                  D ]e  }t	         fd|j
                  j                         D              }|s3j                  |       j                  d |j                  D               g t              |k(  rS )a\  Extract IDs of Tensors which depend or will depend on a gradient.

        Note that this weakened definition of "depends" requires us to loop
        over the data flow graph multiple times because it allows dependency
        information to flow backward through edges and removes the guarantee
        that nodes are topologically sorted. (Or indeed, even that a valid
        topological order exists.) Put another way, we have converted an
        acyclic data flow graph into a cyclic graph and we are attempting to
        partition cycles involving a gradient from the rest of the graph.
        c              3      K   | ]`  \  }\  }}j                   j                  ||      t        j                  t        j                  fv s|j
                  v r|j
                   b y wrN   )r  r	  r   r$   r&   r[   )r   r   r   rW  depends_on_gradientrJ   s       r*   r   zAMemoryProfile._any_version_depends_on_gradient.<locals>.<genexpr>  sa      )\a''++C9 ))8+=+=>?vv!44	 FFs   A&A)c              3   4   K   | ]  }|j                     y wrN   )r[   r  s     r*   r   zAMemoryProfile._any_version_depends_on_gradient.<locals>.<genexpr>  s     .N#svv.Ns   )	r1  r   r  rQ  r   r}   r#  r   r%  )rJ   
start_sizert   idsr  s   `   @r*    _any_version_depends_on_gradientz.MemoryProfile._any_version_depends_on_gradient  s     ),01J--88 O -1[[->->-@  '..s3'...N.NNO$ &':5**+ r)   c                 h   | j                   j                         D ]?  }t        |      D ]/  \  }}| j                  j	                  |t
        j                         1 A | j                  j                  D ]=  }|j                  D ],  }| j                  j                  |t
        j                         . ? y)z>Mark Tensors which are unambiguous and simple to reason about.N)rG  r   r   r  ry  r   r$   r  rQ  rA  r{  r"   )rJ   r   r   r   rt   r   s         r*   r  z,MemoryProfile._set_gradients_and_temporaries  s     ]]&&( 	FE.u5 F	6  **683D3DEF	F ))44 	CD'' C  ++Ax/A/ABC	Cr)   c                     | j                   j                         D ]?  }t        |      D ]/  }|| j                  j	                  |t
        j                         1 A y rN   )rG  r   r   r  ry  r   r&   )rJ   r   r   s      r*   r  z1MemoryProfile._set_parameters_using_python_tracer0  sW    ]]&&( 	FE'. F=$$..q(2D2DEF	Fr)   c                      j                         }t               t         j                  j                        D ]m  }|j
                  j                         D ch c]  \  }\  }}||f }}}}||j                  j                         z  }t         fd|D              si|z  o j                         } j                  j                  D ]N  }t        j                  t        |j                        v s)|t        |j                  j                               z  }P |D ]?  \  }}|j                  |vs j                  j!                  ||t"        j$                         A yc c}}}w )a  Mark inputs based on which Tensors are updated using gradients.

        The process for differentiating between inputs and activations is more
        involved. Most Tensors in a training loop depend on at least one
        gradient: parameters depend on them through updates, and activations
        and optimizer state depend on them transitively through parameters.
        Critically, we do not need to know which Tensors are parameters to
        apply this method; we can simply walk the data flow graph to build the
        set of all values which depend on a gradient and then obtain the set
        of inputs from the conjugate set.

        There is, however, one hiccup. The first time we see a parameter is
        generally on the forward pass of the first step. We know from
        inspection of the data flow graph that v1 of that Tensor depends on
        a gradient (provided we profile an optimizer step), but not v0. To
        address this problem we weaken the definition of "depends on a
        gradient" to "any version of this Tensor depends on a gradient",
        which in turn strengthens the criteria for the input set enough to
        filter the activations in the forward pass of the first step.c              3      K   | ]C  } j                   j                  | t        j                  t        j                  fv xs |v  E y wrN   )r  r	  r   r$   r&   )r   r   produces_gradientrJ   s     r*   r   z,MemoryProfile._set_inputs.<locals>.<genexpr>W  sW        %  $$a(X->->@R@R,SS *))*s   A	AN)r  r1  reversedr  rQ  r}   r#  r%  anycopyr   r{   r   r  r[   r  r  r   r!   )	rJ   r  rt   r   r   rW  tensorsinput_candidatesr  s	   `       @r*   r  zMemoryProfile._set_inputs6  sO   2 #CCE /2eT22==> 	-D?C{{?P?P?RSS*;#|7W~SGSt||))++G  ! 
 "W,!	- -113))44 	>D,,
4;;0GG C(:(:(<$== 	> - 	ULCvv00  66sGX^^T	U! Ts   E(
c                      j                         }t               }|j                         D ch c]  \  }}|t        j                  k(  s| }}} j
                  j                  D ]  }|j                  j                         D 	ch c]  \  }\  }}	||	f }
}}}	t        j                  t        |j                        vs\t         fd|
D              rqt         fd|j                  j                         D              r|j                  |
      s||j                  j                         z  }||
j                  |      z  } t               t!         j
                  j                        D ]`  }t         fd|j                  j                         D              s1j#                  d |j                  j                         D               b |j%                         |D ch c]  \  }}|j&                   }}}| j)                         z  }|j+                         D ]>  \  }}|j&                  |v s j,                  j/                  |t        j0                         @ yc c}}w c c}	}}w c c}}w )a  Deduce which Tensors are parameters.

        Consider the following code for the step of SGD with momentum
        (nesterov=False), where `d_p` is the gradient of `param` and `buf` is
        the momentum buffer.
        ```
          buf.mul_(momentum).add_(d_p, alpha=1 - dampening)
          d_p = buf
          param.add_(d_p, alpha=-lr)
        ```
        Both `param` and `buf` take a gradient and perform an in-place update.

        The python tracer will inspect calls to `nn.Module.forward` and
        `optim.Optimizer.step` to extract parameter and optimizer state
        respectively (including parameters), so this is generally a non-issue.

        However as a fallback we can also exploit several properties of
        parameters to distinguish them from other model state.

        First, they are directly used in the forward pass. (At this point we
        haven't established which parts of the graph correspond to the forward
        pass but we can deduce enough to suffice.) Some mutable state such as
        batch norm moving averages also contribute to the forward pass, but
        optimizer state does not.

        Second, a parameter is by definition used to compute at least one
        gradient and depends on at least one gradient.
        c              3   <   K   | ]  } j                   |   y wrN   r  r   r   rJ   s     r*   r   z@MemoryProfile._set_parameters_using_data_flow.<locals>.<genexpr>  s     Ba-D--q1B   c              3   <   K   | ]  } j                   |   y wrN   r  r  s     r*   r   z@MemoryProfile._set_parameters_using_data_flow.<locals>.<genexpr>  s     Pa-D--q1Pr  c              3   H   K   | ]  } j                   | xs |v   y wrN   r  )r   r   rJ   used_for_gradients     r*   r   z@MemoryProfile._set_parameters_using_data_flow.<locals>.<genexpr>  s4       "!!1%?.?)??s   "c              3   0   K   | ]  \  }\  }}||f  y wrN   r(   )r   r   r   rW  s       r*   r   z@MemoryProfile._set_parameters_using_data_flow.<locals>.<genexpr>  s!      )'8sLQS'N)r  N)r  r1  r#  r   r!   r  rQ  r}   r   r{   r   r  r  r%  intersection
differencer  r   intersection_updater[   r  r  r  ry  r&   )rJ   r  candidate_parametersr   rv  candidate_fwd_tensorsrt   r   r   r  r}   parameter_keysr  s   `           @r*   r  z-MemoryProfile._set_parameters_using_data_flowi  s*   : **, 25!)!13
!XX5OA3
 3
 ))44 	QD:>++:K:K:MNNsJQsElNFN --Z5LLB6BBP4<<;M;M;OPP *66v>%););)==%$(9(9:O(PP$	Q /2eT22==> 	D ++-  "(( )<@KK<M<M<O) 	 	001BC 0DDVS!#&&DD$??AAmmo 	DFCvv'  **30B0BC	DE3

 O4 Es   I,I,
I2
.I9c                    t         j                  t         j                  h}t         j                  t         j                  h}| j
                  j                  D ]  }|j                  j                         D ch c]  \  }\  }}||f }}}}|D ch c]  } | j                  j                  |  }	}|	|z  sb|	||z  z
  rkt        j                  t        |j                        vs|j                  j                         D ].  } | j                  j                   g |t         j                    0  yc c}}}w c c}w )z(Flood the graph to identify activations.N)r   r!   r#   r&   r"   r  rQ  r}   r#  r  r	  r   r{   r   r  r%  r  )
rJ   requiredalso_allowedrt   r   r   r  r}   r   input_categoriess
             r*   r  zMemoryProfile._set_activations  s    NNH$7$78 **H,>,>?))44 	TD:>++:K:K:MNNsJQsElNFNBHIQ 4 0 0 4 4a 8II "H,)X-DE  11DKK9PP++- TA:D$$::SASx?R?RST	TNIs   8D6
 D=c           
         | j                   j                         D ]  }|j                  d   t        j                  k(  s$|j                  d   j
                  s>|j                  d   j
                  j                  }t        j                  |D cg c]  \  }}}|
 c}} D ]G  \  }}t        j                  |      }|| j                  j                  |t        j                         I  y c c}}w r   )rG  r   rx   r   r~   r   r   itchainr   rm   r  ry  r   r'   )rJ   r   r   r   r   rj   r   s          r*   r  z"MemoryProfile._set_optimizer_state  s    ]]&&( 	RE{{1~!2!22u{{1~7O7O"[[^55@@
HH
&KAuu&KL RDAq#//2C((2238P8PQR	R 'Ls   C/c                    d t         j                  h}| j                  j                  D ]  }t        j
                  t        |j                        v s)|j                  j                         D ]W  \  }}|dk(  s"| j                  j                  ||dz
        |v s-| j                  j                  ||t         j                         Y  y r   )r   r%   r  rQ  r   r{   r   r  r%  r#  r  r	  r  )rJ   priorrt   r   rW  s        r*   r  z"MemoryProfile._set_autograd_detail  s    x//0))44 	D,,
4;;0GG$(LL$6$6$8 LC!|t'7'7';';C1'MQV'V((>>(*B*B	r)   rD   N)r   r   r   r   r   rs   r   rV   r4   KeyAndIDr  rY   r  r   rr  r	   r   r  r
   r  r  r  r  r  r  r  r  r(   r)   r*   r  r    s    $ $4 $ @%c68S&@ A3 FG @ @DJt J
Dhx6H)H$I 
 !+#c( !+FC"F1UfHDTT&Rr)   r  c                   6    e Zd Zd Zd ZddZddZ	 d	 ddZy)	MemoryProfileTimelinec                 H    |j                   | _         |j                  | _        y)a  The minimum representation of the memory profile timeline
        includes the memory timeline and categories. The timeline
        consists of [timestamp, action, (TensorKey, version), numbytes]
        elements, to denote any actions (pre-existing, create, destroy,
        or increment_version) that occurred to a specific Tensor for a
        chunk of memory. The categories help map each (TensorKey,
        version) pair into a category.N)r  r  
categories)rJ   memory_profiles     r*   r   zMemoryProfileTimeline.__init__  s     '//(44r)   c                 >    t        j                  |      }g }g  fd}d} j                  D ]G  \  }}\  }}	}
|j                  |k7  r|dk7  rt        |dz        }|dk(  s
||k  r|dkD  r|}t	        |      dk(  r9|j                  |       j                  dgt        D cg c]  }d c}z          n;||d   k7  r3|j                  |       j                  d   j                                |t        j                  t        j                  fv r |||	|
       |t        j                  k(  r |||	|
         |||	dz   |
       |t        j                  k(  r |||	|
        <t        d|        |D cg c]  }|dk  r|n| }}|fS c c}w c c}w )zConvert the memory timeline and categories into a memory plot
        consisting of timestamps and their respective sizes by category
        for a given device.

        Input: device
        Output: [timestamps, sizes by category]
        c                     t        | t              rj                  j                  | |      nd }t        |   dz   }d   |xx   t        |      z  cc<   y )Nrv   r.  )rO   r   r  r	  _CATEGORY_TO_INDEXrV   )r   rW  r   rv  rp   rJ   r  s        r*   r   z8MemoryProfileTimeline._coalesce_timeline.<locals>.update  sV     c9- ##C1 
 'x014E"IeE
*r)   r.  i  r   rv   Unknown action: )r>   r=   r  rV   r   r   r  r  r4   r5   r6   r7   r8   
ValueError)rJ   
device_strr=   timesr   t_minrj   r  r   rW  numbytesr   r  s   `           @r*   _coalesce_timelinez(MemoryProfileTimeline._coalesce_timeline  s    j)!#	+ 37== !	>/Av~WxzzV# BwDM {q5yQU 5zQQaS/A#B!A#BBCeBiQU2Y^^-. &,,fmm<<sGX.6333sGhY/sGaK26>>)sGhY/ !#3F8!<==C!	>F 1661!a%Q&66e|+ $C( 7s   $	F?FNc                     | j                  |      \  }}ddl}t        |d      5 }|j                  ||g|       ddd       y# 1 sw Y   yxY w)zSaves the memory timeline as [times, sizes by category]
        as a JSON formatted file to the given path for the given
        device.r   Nw)r  jsonopendump)rJ   pathr  r  r  r  fs          r*   export_memory_timelinez,MemoryProfileTimeline.export_memory_timeline"  sM     ..z:u$_ 	)IIuena(	) 	) 	)s   AAc                     t        j                  |      }g } fd} j                  D ]  \  }}\  }}	}
|j                  |k7  r|t        j                  t        j
                  fv r$|j                  |t        |   |
 |||	      f       b|t        j                  k(  rK|j                  |t        |   |
  |||	      f       |j                  |t        |   |
 |||	dz         f       |t        j                  k(  r%|j                  |t        |   |
  |||	      f       t        d|        ddl}t        |d      5 }|j                  ||       ddd       y# 1 sw Y   yxY w)zSaves the memory timeline as raw memory event tuples in the
        form of (timestamp, action, numbytes, category)
        as a JSON formatted file to the given path for the given
        device.c                 r    t        | t              rj                  j                  | |      nd }t        |   S rN   )rO   r   r  r	  r  )r   rW  rv  rJ   s      r*   get_category_indexzLMemoryProfileTimeline.export_memory_timeline_raw.<locals>.get_category_index5  s:     c9- ##C1 
 &h//r)   rv   r  r   Nr  )r>   r=   r  r4   r5   r6   r   _ACTION_TO_INDEXr7   r8   r  r  r  r  )rJ   r  r  r=   
raw_eventsr  rj   r  r   rW  r  r  r  s   `            r*   export_memory_timeline_rawz0MemoryProfileTimeline.export_memory_timeline_raw-  s   
 j)68
	0 48== +	>/Av~WxzzV#&,,fmm<<!!(0 *38	 6333!!(0!	*38	 !!(0 *3!<	 6>>)!!(0!	*38	 !#3F8!<==W+	>Z 	$_ 	%IIj!$	% 	% 	%s   EE&c           	         ddl }|j                  j                  d      }|t        d       yddlm} ddlm} ddlm	}	 ddl
m}
 ddl}| j                  |      }|j                  |d         |j                  |d         }}t        |      }||z  }|j!                  |d	      d
z  }t#        j$                  |      }t"        j&                  j)                  |      }t"        j&                  j+                  |      }|
j-                  |d      }|j/                         }t0        j3                         D ]8  \  }}t4        |   }|j7                  |dz  |dd|f   |dd|dz   f   |d       : |j9                  t0        D cg c]  }|dn|j:                   c}       |j=                  d       |j?                  d       djA                  |r|gng d|d
z  dd|d
z  ddgz         }|jC                  |        |	ddd      }|jE                          |jG                  |j:                  d       tI        |j:                  d      5 } ||jK                               jM                  d      }d | d!}tI        |d"      5 }|jO                  |       ddd       ddd        ||j:                         yc c}w # 1 sw Y   )xY w# 1 sw Y   -xY w)#zsExports the memory timeline as an HTML file which contains
        the memory timeline plot embedded as a PNG file.r   N
matplotlibzDexport_memory_timeline_html failed because matplotlib was not found.)	b64encode)remove)NamedTemporaryFilerv   )axisi   @P   )figsizedpig     @@gffffff?)coloralphaUnknownz	Time (ms)zMemory (GB)z

zMax memory allocated: z.2fz GiB 
Max memory reserved: z GiBwbz.pngF)suffixrg  png)formatrbzutf-8z}<html>
<head><meta charset="utf-8" /><title>GPU Memory Timeline HTML</title></head>
<body>
  <img src='data:image/png;base64,z'>
</body>
</html>r  )(importlib.utilutil	find_specprintbase64r  osr  tempfiler  matplotlib.pyplotpyplotnumpyr  arraymincumsumr>   r=   cudamax_memory_allocatedmax_memory_reservedfiguregca_CATEGORY_TO_COLORSr#  r  fill_betweenlegendr|   
set_xlabel
set_ylabeljoin	set_titleclosesavefigr  readdecodewrite)rJ   r  r  r  title	importlibmatplotlib_specr  r  r  pltnpmtr  r  r  stackedr=   r  r  figaxesrv  r  r   tmpfiletmpencodedhtmlr  s                                 r*   export_memory_timeline_htmlz1MemoryProfileTimeline.export_memory_timeline_htmlo  s    	#..22<@"V $/'$$Z0xx1AuE
))E)*W4j)$zz>>vF#jj<<VD jjbj1wwy288: 	OHe"8,AWQT]GAq1uH,=URU  	
 	

@ST1I6TU$&eW2()=w)G(L M((;W(Ec'J$P
 	u %T&GGLL/',,% 
	
+227;G# $+) ,D dC A
	 	w||; U6 
	 
	s*   J6	8KJ;K;K	 KKr  ))      N)r   r   r   r   r  r  r  r2  r(   r)   r*   r  r    s-    	5:x	)@%F 9=E	Er)   r  )Jr3  rp  r   	itertoolsr  loggingtypingr   r   r   r   r   r   r	   r
   r   r   typing_extensionsr   r>   torch._Cr   torch._C._autogradr   torch._C._profilerr   r   r   r   r   r   torch._utilsr   torch.profilerr   rV   r  rr  	getLoggerr   r   Enumr   r&   r'   r!   r"   r#   r$   r%   r  r   r  r4   r  r  	dataclassr   rA   r   r   r   r   r   r   r   r   r  r  r  ri  rt  r  r  )r   cs   00r*   <module>rB     s          &  # .  ' ! K$%g!"tyy " kNNG|k&	  (11D'EFtq!adF TYY  )//1AqwwJ/  $E$?  @ ( ( (( $D>.X .X ?.Xb'N
'NeHY'))<<=>'NT^ 0C 
eHY'234h~. 5c9I3J j jZ
" 
"@! @!F 
$ 
$ 
$[) [)|g) g)T K K K 
 
 
>@ @F
Z Zw G 0s   7I1!I7