
    ¯wg`                        d dl Z d dlZd dlZd dlZd dlmZmZmZmZ d dl	Z	d dl
mc mZ d dlmZmZmZmZmZ d dlmZ d dlmZmZmZ  G d d      Z G d d	e      Z G d
 de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z  G d de      Z! G d de      Z"dee   fdZ#defdZ$defdZ%	 	 	 d"de&de&d ee'   fd!Z(y)#    N)DictListOptionalSet)
_EventType_ExtraFields_PyCall_ExtraFields_PyCCall_ExtraFields_TorchOp_ProfilerEvent)profile)index_of_first_matchtraverse_bfstraverse_dfsc                       e Zd ZdZddedefdZed        Zde	fdZ
d Zd	ee	   fd
Zd	ee	   fdZde	fdZd Zde	fdZde	fdZde	fdZde	fdZde	fdZy)Patternz
    Base class for all patterns, subclass this class and implement match()
    to define custom patterns.

    In subclass, define description and skip property.
    profshould_benchmarkc                 ~   || _         || _        d| _        d| _        d| _        |j
                  |j
                  j                  J |j
                  j                  j                         | _        i | _	        | j                  D ]7  }| j                  j                  |j                  g       j                  |       9 y )Nz!Please specify a name for patternz(Please specify a description for pattern )r   r   namedescriptionurlprofilerkineto_resultsexperimental_event_tree
event_treetid_root
setdefault	start_tidappend)selfr   r   events       d/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/torch/profiler/_pattern_matcher.py__init__zPattern.__init__   s    	 07	E}}(T]]-I-I-UUU--66NNP9;__ 	HEMM$$U__b9@@G	H    c                      yNF r!   s    r#   skipzPattern.skip)   s    r%   r"   c                 :    | j                    dt        |       }|S )Nz
[Source Code Location] )r   source_code_location)r!   r"   msgs      r#   reportzPattern.report-   s+      9:Nu:U9VW 	 
r%   c              #   J   K   t        | j                        E d{    y7 w)z
        Traverse the event tree and yield all events.
        Override this method in subclass to customize the traversal.
        N)r   r   r)   s    r#   eventTreeTraversalzPattern.eventTreeTraversal3   s     
  000   #!#eventsc                     | j                    dt        |       d}| j                  rt        | d      r| j	                  |      S |S |S )N: z events matched.	benchmark)r   lenr   hasattrbenchmark_summary)r!   r2   default_summarys      r#   summaryzPattern.summary:   sY    !YYKr#f+6FG   4- &&v. %
 r%   c           
         dt         fd}t        | d      sJ d       | j                  |      t        d |D              }t        fd|D              }| j                   dt        |       d |||z
         d	t        ||z  d
       dS )Ntime_nsc                 L    g d}|D ]  }| dk  r
| dd| c S | dz  }  | ddS )N)nsusmsi  z.2f z sr(   )r<   unit_lstunits      r#   format_timez.Pattern.benchmark_summary.<locals>.format_timeF   sK    )H  !T>%c]!D622D ! c]"%%r%   r5   zPlease implement benchmark()c              3   4   K   | ]  }|j                     y wN)duration_time_ns).0r"   s     r#   	<genexpr>z,Pattern.benchmark_summary.<locals>.<genexpr>P   s     GuE22Gs   c              3   T   K   | ]  }t        |         |j                  z   ! y wrF   )input_shapesrG   )rH   r"   shapes_factor_maps     r#   rI   z,Pattern.benchmark_summary.<locals>.<genexpr>Q   s.      
 l512U5K5KK
s   %(r4   z* events matched. Total Estimated Speedup: z (   zX))intr7   r5   sumr   r6   round)r!   r2   rD   original_timenew_timerL   s        @r#   r8   zPattern.benchmark_summaryE   s    	& 	& t[)I+II) NN62GGG 

 

 yykCK= )((3MH4L(M'NbQVWdemWmopQqPrrtv	
r%   c                     t         )zt
        Return True if the event matches the pattern.
        This method should be overriden in subclass.
        )NotImplementedErrorr!   r"   s     r#   matchzPattern.matchZ   s
    
 "!r%   c                     | j                   rg S g }| j                         D ]%  }| j                  |      s|j                  |       ' |S rF   )r*   r0   rV   r    )r!   matched_eventsr"   s      r#   rX   zPattern.matched_eventsa   sM    99I,,. 	-Ezz% %%e,	- r%   c                 P    |j                   r|j                   }|j                   r|S rF   parentrU   s     r#   root_ofzPattern.root_ofj   s     llLLE llr%   c                     |j                   r|j                   j                  }n| j                  |j                     }|j	                  |      }|d | ||dz   d  fS )N   )r[   childrenr   r   index)r!   r"   r_   r`   s       r#   siblings_ofzPattern.siblings_ofo   sU    <<||,,H}}U__5Hu%%!)+!666r%   c                 <    | j                  |      \  }}|r|d   S d S )Nr   ra   )r!   r"   _next_eventss       r#   next_ofzPattern.next_ofw   s'    ))%0;!,{1~6$6r%   c                 <    | j                  |      \  }}|r|d   S d S )Nrc   )r!   r"   prev_eventsrd   s       r#   prev_ofzPattern.prev_of{   s'    ))%0Q"-{2747r%   c                 v    |sy |j                   r) ||      s!|j                   }|j                   r	 ||      s!|S rF   rZ   )r!   r"   	predicates      r#   go_up_untilzPattern.go_up_until   s4    ll9U#3LLE ll9U#3r%   NF)__name__
__module____qualname____doc__r   boolr$   propertyr*   r   r.   r0   r   r:   r8   rV   rX   r\   ra   rf   rj   rm   r(   r%   r#   r   r      s    
HW 
H 
H  N 1	d>2 	
^(< 
*"> "^ 
7 77^ 78^ 8 r%   r   c                   :     e Zd Zddededef fdZdefdZ xZ	S )NamePatternr   r   r   c                 H    t         |   ||       d| | _        || _        y )NzMatched Name Event: )superr$   r   r   )r!   r   r   r   	__class__s       r#   r$   zNamePattern.__init__   s)    /01$8	r%   r"   c                 Z    t        j                  | j                  |j                        d uS rF   )researchr   rU   s     r#   rV   zNamePattern.match   s     yyEJJ/t;;r%   rn   )
ro   rp   rq   r   strrs   r$   r   rV   __classcell__ry   s   @r#   rv   rv      s)    W C 4 
<> <r%   rv   c                   V     e Zd ZdZd	dedef fdZed        Zd Z	de
e   fdZ xZS )
ExtraCUDACopyPatternas  
    This pattern identifies if we creates a constant tensor on CPU and immediately moves it to GPU.
    example: torch.zeros((100, 100)).to("cuda")

    Pattern:
    build-in method                 |build-in method
        ...                         |    aten::to
            aten::fill_/aten::zero_ |        aten::_to_copy

    Algorithm:
    We start at node aten::to, go parent events' previous events,
    and check if we have a aten::fill_/aten::zero_ as we keep going down the tree.
    We always select the last child in the children list when we go down the tree.
    If at any step we failed, it is not a match.
    r   r   c                 b    t         |   ||       d| _        d| _        d| _        h d| _        y )NzExtra CUDA Copy PatternzQFilled a CPU tensor and immediately moved it to GPU. Please initialize it on GPU.zlhttps://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#create-tensors-directly-on-the-target-device>   aten::fill_aten::normal_aten::uniform_aten::zero_)rx   r$   r   r   r   init_opsr!   r   r   ry   s      r#   r$   zExtraCUDACopyPattern.__init__   s5    /0-	n B
r%   c                 b    | j                   j                   xs | j                   j                   S rF   r   
with_stackrecord_shapesr)   s    r#   r*   zExtraCUDACopyPattern.skip   '    99'''Ftyy/F/F+FFr%   c                 (   |j                   dk7  ry|}|j                  sy|j                  d   }|j                   dk7  ry|j                  sy|j                  d   }|j                   dk7  ryt        |      }t        |      dk  ry|d   |d   |d   k7  ry|}|j                  }|y| j                  |      }|y|j                  r5|j                  d   }|j                   | j                  v ry	|j                  r5|j                   | j                  v S )
Nzaten::toFrh   zaten::_to_copyzaten::copy_rM   r   r^   T)r   r_   input_dtypesr6   r[   rj   r   )r!   r"   to_eventdtypess       r#   rV   zExtraCUDACopyPattern.match   s   ::#~~r"::))~~r"::&e$v;?!9q	VAY 6=U#=nnNN2&EzzT]]*	 nn
 zzT]]**r%   r2   c                 :   |D ci c]  }t        |      d }}|D ]w  }|d   }t        j                  dd|i      }t        j                  dd|i      }|j                  d      j                  }|j                  d      j                  }	|	|z  ||<   y |S c c}w )N        r   ztorch.ones(size).to("cuda")sizestmtglobalsztorch.ones(size, device="cuda")
   )rK   r5   Timertimeitmean)
r!   r2   r"   rL   shaper   to_timerde_timerto_timede_times
             r#   r5   zExtraCUDACopyPattern.benchmark   s    CIJ%\%0#5JJ& 
	9E8D 2VTNH !6H oob)..Goob)..G'.'8e$
	9 !  Ks   Brn   )ro   rp   rq   rr   r   rs   r$   rt   r*   rV   r   r   r5   r~   r   s   @r#   r   r      sH     

W 

 

 G G#+L!^ 4 !r%   r   c                   @     e Zd ZdZddedef fdZd ZdefdZ	 xZ
S )	ForLoopIndexingPatterna  
    This pattern identifies if we use a for loop to index a tensor that
    can be vectorized.
    example:
    tensor = torch.empty((100, 100))
    for i in range(100):
        tensor[i] = i

    Pattern:
    aten::select | ... | aten::select | ... (Repeat)

    Algorithm:
    We start at node aten::select, and we check if we can find this alternating patterns.
    We also keep a dictionary to avoid duplicate match in the for loop.
    r   r   c                 `    t         |   ||       d| _        d| _        t	               | _        y )NzFor Loop Indexing Patternz6For loop indexing detected. Vectorization recommended.)rx   r$   r   r   setvisitedr   s      r#   r$   zForLoopIndexingPattern.__init__   s+    /0/	S!$r%   c              #   J   K   t        | j                        E d{    y7 w)zN
        We need to use BFS traversal order to avoid duplicate match.
        N)r   r   r)   s    r#   r0   z)ForLoopIndexingPattern.eventTreeTraversal  s       000r1   r"   c           
         |j                   dk7  ry|j                  | j                  v ryd}| j                  |      \  }}t	        |      dk  ryd }t        |d       }|y|g|d | z   }|t	        |      dz
  d  }t        dt	        |      t	        |            D ]M  } |||||t	        |      z          r.|dz  }| j                  j                  ||   j                         I |dk\  S  |dk\  S )Naten::selectFr^   c                     t        |       t        |      k7  ryt        | |      D ]   \  }}|j                  |j                  k7  s  y y)NFT)r6   zipr   )list1list2op1op2s       r#   same_opsz.ForLoopIndexingPattern.match.<locals>.same_ops  sG    5zSZ'u- !S88sxx' ! r%   c                      | j                   dk(  S )Nr   )r   es    r#   <lambda>z.ForLoopIndexingPattern.match.<locals>.<lambda>  s    qvv?W r%   r   r   )r   idr   ra   r6   r   rangeadd)	r!   r"   repeat_countrd   nextr   next_select_idxindexing_opsis	            r#   rV   zForLoopIndexingPattern.match  s   ::'88t||#""5)4t9>	 /t5WX"w&6!77C%)+,q#d)S%67 	Ad1q3|3D/D&EF!  a,r!!	 r!!r%   rn   )ro   rp   rq   rr   r   rs   r$   r0   r   rV   r~   r   s   @r#   r   r      s,     'W ' '1"> "r%   r   c                   h     e Zd Zd
dedef fdZe fd       ZdefdZ	defdZ
dee   fd	Z xZS )FP32MatMulPatternr   r   c                 P    t         |   ||       d| _        d| _        d| _        y )NzFP32 MatMul Patternz|You are currently using GPU that supports TF32. Please enable TF32 by setting 'torch.backends.cuda.matmul.allow_tf32 = True'zUhttps://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devicesrx   r$   r   r   r   r   s      r#   r$   zFP32MatMulPattern.__init__+  s2    /0)	[ 	 kr%   c                     t         j                  j                  d}n.t        d t         j                  j                         D              }|du xs# t        |   xs | j                  j                   S )NFc              3   >   K   | ]  }t        |d d       dk\    yw)   NP   )rN   )rH   archs     r#   rI   z)FP32MatMulPattern.skip.<locals>.<genexpr>:  s     V43tABx=B.Vs   )
torchversionhipallcudaget_arch_listrx   r*   r   r   )r!   has_tf32ry   s     r#   r*   zFP32MatMulPattern.skip4  s]    ==(H V5::;S;S;UVVH5 OEGLO		8O8O4OOr%   r"   c                     |j                   t        j                  k7  ryt        |j                  t
              sJ |j                  dk(  r|j                  j                  du ryy)NFaten::mmT)tagr   TorchOp
isinstanceextra_fieldsr
   r   allow_tf32_cublasrU   s     r#   rV   zFP32MatMulPattern.match=  sT    99
***%,,.BCCC::#!!33u<r%   c                     | j                   S rF   )r   rU   s     r#   r.   zFP32MatMulPattern.reportG  s    r%   r2   c                 ,   |D ci c]  }t        |      d }}|D ]  }t        j                  |d   dt        j                        }t        j                  |d   dt        j                        }t	        j
                  d||d      }t	        j
                  dd	||d
      }dt        j                  j                  j                  _	        |j                  d      j                  }	|j                  d      j                  }
|
|	z  ||<    |S c c}w )Nr   r   r   devicedtyper^   torch.mm(matrixA, matrixB)matrixAmatrixBr   z,torch.backends.cuda.matmul.allow_tf32 = True)r   setupr   Fr   )rK   r   randnfloat32r5   r   backendsr   matmul
allow_tf32r   r   )r!   r2   r"   rL   r   r   r   
fp32_timer
tf32_timer	fp32_time	tf32_times              r#   r5   zFP32MatMulPattern.benchmarkJ  s    CIJ%\%0#5JJ& 	=Ekk%(6OGkk%(6OG"1$+@J #1D$+@J
 5:ENN&&1"))"-22I"))"-22I'09'<e$	=  ! # Ks   Drn   )ro   rp   rq   r   rs   r$   rt   r*   r   rV   r.   r   r5   r~   r   s   @r#   r   r   *  sX    kW k k P P>  N  !^ 4 !r%   r   c                   :     e Zd ZdZddedef fdZdefdZ xZ	S )OptimizerSingleTensorPatterna{  
    This pattern identifies if we are using the single-tensor version of an optimizer.
    example:
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    By adding foreach=True to enable multi-tensor optimizer, we can gain speedup when
    the kernels are relatively small.

    Pattern:
    XXXXX: _single_tenser_<OPTIMIZER_NAME>

    Algorithm:
    String match
    r   r   c                 b    t         |   ||       d| _        g d| _        d| _        d| _        y )NzOptimizer Single Tensor Pattern)adamsgdadamwzDeteced optimizer running with single tensor implementation. Please enable multi tensor implementation by passing 'foreach=True' into optimizer.r   )rx   r$   r   optimizers_with_foreachr   r   r   s      r#   r$   z%OptimizerSingleTensorPattern.__init__n  s9    /05	'?$b 	 r%   r"   c                 f    | j                   D ]"  }|j                  j                  d|       s" y y)N_single_tensor_TF)r   r   endswith)r!   r"   	optimizers      r#   rV   z"OptimizerSingleTensorPattern.matchx  s8    55 	Izz""_YK#@A	 r%   rn   
ro   rp   rq   rr   r   rs   r$   r   rV   r~   r   s   @r#   r   r   _  s'    W  > r%   r   c                   :     e Zd ZdZddedef fdZdefdZ xZ	S )SynchronizedDataLoaderPatterna  
    This pattern identifies if we are using num_workers=0 in DataLoader.
    example:
    torch.utils.data.DataLoader(dataset, batch_size=batch_size)
    Add num_workers=N to the arguments. N depends on system configuration.

    Pattern:
    dataloader.py(...): __iter__
        dataloader.py(...): _get_iterator
            NOT dataloader.py(...): check_worker_number_rationality

    Algorithm:
    If we don't see check_worker_number_rationality call in the dataloader __iter__,
    It is not an asynchronous dataloader.

    r   r   c                 P    t         |   ||       d| _        d| _        d| _        y )NzSynchronized DataLoader PatternzDetected DataLoader running with synchronized implementation. Please enable asynchronous dataloading by setting num_workers > 0 when initializing DataLoader.zjhttps://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#enable-async-data-loading-and-augmentationr   r   s      r#   r$   z&SynchronizedDataLoaderPattern.__init__  s4    /05	n 	
: 	r%   r"   c                 B   dt         dt         fd}	 |j                    ||j                  d      sy|j                  sy|j                  d   } ||j                  d      sy|j                  sy|j                  d   } ||j                  d       S # t        $ r Y yw xY w)	Nr   function_namec                     | j                  t        j                  j                  dddd            xr | j	                  |      S )Nr   utilsdatazdataloader.py)
startswithospathjoinr   )r   r   s     r#   is_dataloader_functionzCSynchronizedDataLoaderPattern.match.<locals>.is_dataloader_function  s:    ??WgvG /--./r%   F__iter__r   _get_iteratorcheck_worker_number_rationality)r}   r   UnicodeDecodeErrorr_   )r!   r"   r   s      r#   rV   z#SynchronizedDataLoaderPattern.match  s    	/ 	/S 	/	JJ &ejj*=~~q!%ejj/B~~q!)%**6WXXX " 		s   B 	BBrn   r   r   s   @r#   r   r     s)    "

W 

 

Y> Yr%   r   c                   :     e Zd ZdZddedef fdZdefdZ xZ	S )GradNotSetToNonePatterna  
    This pattern identifies if we are not setting grad to None in zero_grad.
    example:
    optimizer.zero_grad()
    By setting set_to_none=True, we can gain speedup

    Pattern:
    XXXXX: _zero_grad
        NOT aten::zeros
            aten::zero_

    aten::zero_ is called on each parameter in the model.
    We also want to make sure it is not called by aten::zeros.

    Algorithm:
    String match
    r   r   c                 P    t         |   ||       d| _        d| _        d| _        y )Nz,Gradient Set To Zero Instead of None PatternzfDetected gradient set to zero instead of None. Please add 'set_to_none=True' when calling zero_grad().zxhttps://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#disable-gradient-calculation-for-validation-or-inferencer   r   s      r#   r$   z GradNotSetToNonePattern.__init__  s5    /0B	F 	
H 	r%   r"   c                     |j                   j                  d      sy|j                  syt        |j                        D ]-  }|j                   dk(  s|j                  j                   dk7  s- y y)Nz: zero_gradFr   zaten::zerosT)r   r   r_   r   r[   )r!   r"   	sub_events      r#   rV   zGradNotSetToNonePattern.match  sa    zz""=1~~%enn5 	I-/$$))]:	 r%   rn   r   r   s   @r#   r  r    s'    $

W 

 

> r%   r  c                   N     e Zd ZdZddedef fdZe fd       Zde	fdZ
 xZS )	&Conv2dBiasFollowedByBatchNorm2dPatternau  
    This pattern identifies if we are enabling bias in Conv2d which is followed by BatchNorm2d.
    Bias doesn't do anything when followed by batchnorm.
    Pattern:
    nn.Module: Conv2d            | nn.Module: BatchNorm2d
        ...
            aten::conv2d AND dtype of third argument is not null
    The third argument is the bias
    Algorithm:
    String match
    r   r   c                 P    t         |   ||       d| _        d| _        d| _        y )Nz5Enabling Bias in Conv2d Followed By BatchNorm PatternzcDetected bias enabled in Conv2d that is followed by BatchNorm2d. Please set 'bias=False' in Conv2d.zhttps://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#disable-bias-for-convolutions-directly-followed-by-a-batch-normr   r   s      r#   r$   z/Conv2dBiasFollowedByBatchNorm2dPattern.__init__  s2    /0K	 AO 	r%   c                 L    | j                   j                  du xs
 t        |   S r'   )r   r   rx   r*   )r!   ry   s    r#   r*   z+Conv2dBiasFollowedByBatchNorm2dPattern.skip  s!    yy&&%/?57<?r%   r"   c                     |j                   dk7  ryt        t        |            dk  st        |      d   y| j                  |d       }|sy| j	                  |      }|sy|j                   j                  d      S )Nzaten::conv2dFr   rM   c                 8    | j                   j                  d      S )Nznn.Module: Conv2d)r   r   r   s    r#   r   z>Conv2dBiasFollowedByBatchNorm2dPattern.match.<locals>.<lambda>	  s    QVV../BC r%   znn.Module: BatchNorm2d)r   r6   r   rm   rf   r   rU   s     r#   rV   z,Conv2dBiasFollowedByBatchNorm2dPattern.match  s}    ::'|E"#a'<+>q+A+I  C
 U#zz$$%=>>r%   rn   )ro   rp   rq   rr   r   rs   r$   rt   r*   r   rV   r~   r   s   @r#   r	  r	    s=    

W 
 
 @ @?> ?r%   r	  c                   X     e Zd Zd	dedef fdZed        ZdefdZ	de
e   fdZ xZS )
MatMulDimInFP16Patternr   r   c                 P    t         |   ||       d| _        d| _        d| _        y )Nz3Matrix Multiplication Dimension Not Aligned PatternzUDetected matmul with dimension not aligned. Please use matmul with aligned dimension.z[https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#use-mixed-precision-and-ampr   r   s      r#   r$   zMatMulDimInFP16Pattern.__init__  s)    /0I	rpr%   c                 b    | j                   j                   xs | j                   j                   S rF   r   r)   s    r#   r*   zMatMulDimInFP16Pattern.skip  r   r%   r"   c                     d }|j                   dvryt        |      syt        |      d   }|t        j                  t        j                  fv r |t        |      d      syy)Nc                 ,    t        fd| D              S )Nc              3   B   K   | ]  }|d d D ]  }|z  dk(     yw)Nr   r(   )rH   r   dimmultiples      r#   rI   zCMatMulDimInFP16Pattern.match.<locals>.mutiple_of.<locals>.<genexpr>   s/     Uu%PRPS*U3sX~*U*Us   )r   )shapesr  s    `r#   
mutiple_ofz0MatMulDimInFP16Pattern.match.<locals>.mutiple_of  s    UUUUr%   )r   z	aten::bmmzaten::addmmFr      T)r   r   r   bfloat16halfrK   )r!   r"   r  	arg_dtypes       r#   rV   zMatMulDimInFP16Pattern.match  sb    	V ::EEE" '*	44Z>
 r%   r2   c                    d }|D ci c]  }t        |      d }}|D ]'  }t        j                  |d   dt        j                        }t        j                  |d   dt        j                        }t	        j
                  d||d	      }t        j                   ||d   d
      dt        j                        }t        j                   ||d   d
      dt        j                        }t	        j
                  d||d	      }	|j                  d      j                  }
|	j                  d      j                  }||
z  ||<   * |S c c}w )Nc                 ^    | D cg c]  }|t        j                  ||z        z   c}S c c}w rF   )mathceil)r  r  r   s      r#   closest_multiplez:MatMulDimInFP16Pattern.benchmark.<locals>.closest_multiple.  s)    HNOuHtyy)9::OOOs   "*r   r   r   r   r^   r   r   r   r  r   )rK   r   r   float16r5   r   r   r   )r!   r2   r"  r"   rL   r   r   r   not_aligned_dim_timeraligned_dim_timernot_aligned_dim_timealigned_dim_times               r#   r5   z MatMulDimInFP16Pattern.benchmark-  s@   	P DJJ%\%0#5JJ& 	OEkk%(6OGkk%(6OG$-OO1$+@%! kk q1-fEMMG kk q1-fEMMG !*1$+@! $9#?#?#C#H#H 077;@@'7:N'Ne$'	O( ! + Ks   Ern   )ro   rp   rq   r   rs   r$   rt   r*   r   rV   r   r5   r~   r   s   @r#   r  r    sL    qW q q G G> !^ 4 !r%   r  r"   c                    | r| j                   t        j                  k(  s| j                   t        j                  k(  rt	        | j
                  t        t        f      sJ | j
                  j                  j                  j                  dt        j                  z         sC| j
                  j                  j                   d| j
                  j                  j                   S | j                  } | ry)Nr   :zNo source code location found)r   r   PyCallPyCCallr   r   r   r	   caller	file_namer   r   sepline_numberr[   r"   s    r#   r,   r,   I  s    
99
)))UYY*:L:L-L""%8:N$O   %%,,66AA'BFFBRS,,33==>a@R@R@Y@Y@e@e?fgg  +r%   c                     t        | j                  t              sJ t        d | j                  j                  D              S )Nc              3   H   K   | ]  }t        t        |d d              yw)sizesr(   N)tuplegetattrrH   r   s     r#   rI   zinput_shapes.<locals>.<genexpr>W  s     SAwq'2./Ss    "r   r   r
   r4  inputsr0  s    r#   rK   rK   U  s6    e((*>???S9K9K9R9RSSSr%   c                     t        | j                  t              sJ t        d | j                  j                  D              S )Nc              3   6   K   | ]  }t        |d d        yw)r   N)r5  r6  s     r#   rI   zinput_dtypes.<locals>.<genexpr>\  s     NqGT*Ns   r7  r0  s    r#   r   r   Z  s6    e((*>???NE4F4F4M4MNNNr%   r   print_enablejson_report_dirc           
         i }t        | |      t        | |      t        | |      t        | |      t	        | |      t        | |      t        | |      g}t               }g }d dd g}|j                  d       |D ]  }	|	j                         }
|
s|j                  |	j                  |
             |
D ]  }|	j                  |      }||vs|j                  |       |j                  |       t        |      j                  d      \  }}|j                  |g       j                  t!        |      |	j"                  |	j$                  |	j&                  d         |t(        j*                  j-                  |d      }t(        j*                  j/                  |      r<t1        |      5 }t3        j4                  |      }|j7                  |       |}d d d        t1        |d      5 }t3        j8                  ||d	       d d d        |j                  d
       ||z  }|j                  d dd        |rt;        dj-                  |             y y # 1 sw Y   xY w# 1 sw Y   _xY w)Nz(----------------------------------------zTorchTidy ReportzMatched Events:r)  )r/  r   r   messageztorchtidy_report.jsonw   )indentzSummary:
)r   r   r   r   r  r	  r  r   r    rX   r:   r.   r   r,   splitr   rN   r   r   r   r   r   r   existsopenjsonloadupdatedumpprint)r   r   r;  r<  report_dictanti_patternsreported	summariesmessage_listanti_patternrX   r"   
report_msgsrc_locationline_nojson_report_pathfexisiting_reports                     r#   report_all_anti_patternsrW  _  sX    KT#34$ 01$T+;<%d,<=&67.t5EFt%56	M uHIh.vh78L)*% %446--n=># 	E%,,U3J)##J/Z((<U(C(I(I#(N%g&&|R8??'*7| , 1 1+//#/#;#;		( "77<<9PQ77>>*+&' /1#'99Q<  ''4./ "C( 	0AIIk1Q/	0 
#IL6("26(;<dii%& / /	0 	0s   &)I#III#)FTN))rF  r   r   r{   typingr   r   r   r   r   torch.utils.benchmarkr   r5   torch._C._profilerr   r   r	   r
   r   torch.profilerr   torch.profiler._utilsr   r   r   r   rv   r   r   r   r   r   r  r	  r  r,   rK   r   rs   r}   rW  r(   r%   r#   <module>r]     s     	 	 , ,  ) )  # R Ro oj<' <T!7 T!n<"W <"~2! 2!j7 @6YG 6Yt,g ,^(?W (?V3!W 3!l	+ 8 	+T T
O O #%)	8'8' 8' c]	8'r%   