
    ɯwg2                       U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZmZ d dlmZ d dlmZ erd dlmZ d ad ad ag ad	ed
<   g Zded<   d ad aej                   G d d             Zg aded<   d a d a!d a"d Z#e G d d             Z$d Z% G d d      Z&i Z'ded<   e G d d             Z(e(jS                  dg d       e(jS                  dg d       e(jS                  dg d        e(jS                  d!g d"       e(jS                  d#g d$       d% Z*d& Z+d' Z,d( Z-d) Z.d* Z/d+ Z0d, Z1d- Z2d. Z3d/ Z4e	d3d0       Z5d1 Z6d2 Z7y)4    )annotationsN)	dataclass)	lru_cache)DictListSetTupleTYPE_CHECKING)config)get_benchmark_name)BaseSchedulerNodez#List[Tuple[BaseSchedulerNode, int]]nodes_num_elemz%List[Tuple[BaseSchedulerNode, float]]node_runtimesc                  &    e Zd ZU ded<   dZded<   y)CppOuterLoopFusedCountintinner_kernel_numberr   local_buffer_numberN)__name__
__module____qualname____annotations__r        \/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/torch/_inductor/metrics.pyr   r   (   s      r   r   zList[CppOuterLoopFusedCount]!cpp_outer_loop_fused_inner_countsc                     da dadat        j	                          t
        j	                          dadat        j	                          da	da
day )Nr   )generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr   num_comprehensive_padding)num_matches_for_scatter_upon_const_tensornum_loop_reorderingr   r   r   resetr'   8   sX     %&"%++- !01-r   c                  N    e Zd ZU dZded<   ded<   ded<   ded<   ded<   ded<   y	)
CachedMetricsDeltasz]
    The subset of metrics we want update across cache hits, e.g., the
    FxGraphCache.
    r   r   r   r"   r#   r    r%   N)r   r   r   __doc__r   r   r   r   r)   r)   P   s-    
  $''/22r   r)   c                 n    t        j                  t              D  cg c]  } | j                   c} S c c} w N)dataclassesfieldsr)   name)fields    r   get_metric_fieldsr1   _   s&    $/$6$67J$KL5EJJLLLs   2c                  2    e Zd ZdZddZddZedd       Zy)	CachedMetricsHelperz
    A helper class to help calculate and apply counter deltas for those
    metrics we want to save with cache entries (e.g., FxGraphCache) and
    apply on a cache hit.
    c                d    i | _         t               D ]  }t               |   | j                   |<    y r,   )cached_metricsr1   globals)selfmetrics     r   __init__zCachedMetricsHelper.__init__j   s3     ') 	<F*1)F*;D'	<r   c                z    i }t               D ]"  }t               |   | j                  |   z
  ||<   $ t        di |S )Nr   )r1   r6   r5   r)   )r7   delta_metricsr8   s      r   
get_deltaszCachedMetricsHelper.get_deltaso   sL    ') 	TF$+If$58K8KF8S$SM&!	T #3]33r   c                `    t               D ]!  }t               |xx   t        | |      z  cc<   # y r,   )r1   r6   getattr)deltar8   s     r   apply_deltasz CachedMetricsHelper.apply_deltasv   s-    ') 	8FIf!77	8r   N)returnNone)rA   r)   )r?   r)   )r   r   r   r*   r9   r<   staticmethodr@   r   r   r   r3   r3   c   s%    <
4 8 8r   r3   zDict[str, MetricTable]REGISTERED_METRIC_TABLESc                  X    e Zd ZU ded<   ded<   dZded<   d Zd	 Zd
 Zd Ze	d        Z
y)MetricTablestr
table_namez	List[str]column_namesr   r   num_rows_addedc                   | j                   t               vry  |       }t        | j                        t        |      k(  s(J t        | j                         dt        |              t	        | j                        t	        |j                               k(  s6J t	        | j                         dt	        |j                                       t               g}|| j                  D cg c]  }||   	 c}z  }| j                  |       y c c}w )Nz v.s. )rH   enabled_metric_tableslenrI   setkeysr   
_write_row)r7   row_fnrow_dictrowcolumn_names        r   add_rowzMetricTable.add_row   s   ??"7"9984$$%*
 
 	<$##$%VCM?;	< 
 4$$%MMO*
 
 	C$##$%VC,@+AB	C 

  
 	9J9JK+%KK Ls   'D
c                "    d| j                    dS )Nmetric_table_z.csv)rH   )r7   s    r   output_filenamezMetricTable.output_filename   s    t/t44r   c                    | j                         }t        |d      5 }t        j                  |d      }|j	                  dg| j
                  z          d d d        y # 1 sw Y   y xY w)Nw
lineterminator
model_name)rX   opencsvwriterwriterowrI   )r7   filenamefdra   s       r   write_headerzMetricTable.write_header   s^    '')(C  	@BZZ48FOO\NT->->>?	@ 	@ 	@s   7AA&c                   | j                         }| j                  dk(  r/t        j                  j	                  |      s| j                          | xj                  dz  c_        t        |      D ]&  \  }}t        |t              r|d}n|d}n|}|||<   ( t        |d      5 }t        j                  |d      }|j                  |       d d d        y # 1 sw Y   y xY w)Nr      z.6f ar[   r\   )rX   rJ   ospathexistsre   	enumerate
isinstancefloatr_   r`   ra   rb   )r7   rS   rc   idxorig_valnew_valrd   ra   s           r   rP   zMetricTable._write_row   s    '')!#BGGNN8,Dq &s^ 	MC(E*%cN!"CH	 (C  	!BZZ48FOOC 	! 	! 	!s   $)CCc                .    t        | |      }|t        | <   y r,   )rF   rD   )r/   rI   tables      r   register_tablezMetricTable.register_table   s    D,/). &r   N)r   r   r   r   rJ   rU   rX   re   rP   rC   ru   r   r   r   rF   rF      s@    ONC$5@!( / /r   rF   slow_fusion)kernel1_pathkernel1_latencykernel2_pathkernel2_latencyfused_kernel_pathfused_kernel_latencyslow_down_ratiograph_stats)graph_idnum_nodes_before_fusionnum_nodes_after_fusionpersistent_red_perf)kernel1_namekernel2_namerx   rz   
size_hintsreduction_hintspeedup'fusion_failure_due_to_indexing_mismatch)pre_grad_graph_idpost_grad_graph_id
node1_name
node2_namenode1_debug_strnode2_debug_strcommon_buffer_namesfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryr   r   line_of_codenum_load	num_storenum_for_loopnum_atomic_addnum_argsxnumelynumelrnumelkernel_args_num_gbc                    ddl m} ddlm} |j	                  |       } ||      }t        j                  |j                  j                        S )z
    The kernel_module_code is the python module that contains kernel function code.
    kernel function is the proper triton kernel function annotated with
    @triton.jit
    rg   )PyCodeCache)get_triton_kernel)	codecacher   wrapper_benchmarkr   loadinspect	getsourcefn)kernel_module_coder   r   modkernels        r   _parse_kernel_fn_coder     s@     '4


-
.Cs#F VYY\\**r   c                4    t        | j                               S )zJ
    Return the line of code for the kernel excluding the decorators.
    )rM   
splitlines)proper_kernel_fn_codes    r   _parse_kernel_line_of_coder     s     $//122r   c                n    |dk(  ry t        j                  d|       }|sJ d       |j                  d      S )Nforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!rg   researchgroup)r   r   ms      r   _parse_size_hintsr   #  s;    )#
		/1CDA###1771:r   c                l    | dvry t        j                  d|      }|sJ d       |j                  d      S )N)	reductionpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!rg   r   )r   r   r   s      r   _parse_reduction_hintr   ,  s<    CC
		9;MNA???1771:r   c                $    | j                  |      S r,   )count)r   patterns     r   _count_patternr   4  s     &&w//r   c                    | j                         d   }|j                  d      sJ |j                  d      }|j                  d      }||dz   | }|j                  d      }t	        |      S )Nr   def (z):rg   ,)r   
startswithindexsplitrM   )r   def_line	start_idxend_idxdecl_csvcompss         r   _count_argsr   8  sm    $//1!4Hv&&&s#InnT"G	A0HNN3Eu:r   c                .    | j                  d      }| |d S )z
    Skip decorators.
    r   N)r   )kernel_fn_code	start_poss     r   _parse_proper_kernel_fn_coder   B  s!     $$V,I)*%%r   c                n    t        j                  | d|       }|rt        |j                  d            S y )Nz
 = ([\d]+)rg   )r   r   r   r   )r   numel_arg_namer   s      r   _parse_numelr   J  s3    
		^$K02GHA1771:r   c                j    t        j                  d|       }|rt        |j                  d            S 	 y)z
    inductor meta looks like:
        inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
    z.kernel_num_gb.:\s*([0-9.]+)rg   N)r   r   ro   r   )r   r   r   s      r   _parse_kernel_args_num_gbr   R  s5    
 			1>BAQWWQZ  	 r   c           
         	 ddl m}  ||      t        |      t        |      	t	        |      t              t              t        d      j                   	fd       y)z
    An utility to log kernel metadata. We may parse metadata from kernel source code here.

    It's fine to parse the generated kernel code here since the logging is
    disabled by default. It would hurt compilation time.
    rg   )"get_kernel_category_by_source_coder   c                      t        d      t        d      t        d      t        d      t              t        d      t        d      t        d      t               dS )	Nztl.loadztl.storezfor ztl.atomic_addr   r   r   r   )r   r   r   r   )r   r   kernel_line_of_coder   r   r   r   r   s   r   <lambda>z%log_kernel_metadata.<locals>.<lambda>y  s    &&.$,/&'<iH'(=zJ*+@&I,-BOT#$9:"#8(C"#8(C"#8(C";#
 r   N)	r   r   r   r   r   r   r   get_metric_tablerU   )
r   r   r   r   r   r   r   r   r   r   s
   ``  @@@@@@r   log_kernel_metadatar   e  sp     F89KLO*?<NON"#5GJ*+=>N8H 55JK&'//	
 	
r   c                     t         j                         D ]f  \  } }| t               v s|j                         }t        j
                  j                  |      rt	        j                  |       |j                          h y)z
    Purge the old log file at the beginning when the benchmark script runs.
    Should do it in the parent process rather than the child processes running
    each individual model.
    N)	rD   itemsrL   rX   rj   rk   rl   unlinkre   )r/   rt   rc   s      r   purge_old_log_filesr     sb     0557 !e(**,,.Hww~~h'		(# !r   c                     t         j                  } t               }| j                  d      D ]9  }|j	                         }|s|t
        v sJ d| d       |j                  |       ; |S )Nr   zMetric table name z is not registered)r   rL   rN   r   striprD   add)
config_strenabledr/   s      r   rL   rL     su    --JeG  % zz|,,	9v%78	9,D Nr   c                    | t               v S r,   )rL   r/   s    r   is_metric_table_enabledr     s    (***r   c                :    | t         v sJ d|  d       t         |    S )NzMetric table z is not defined)rD   r   s    r   r   r     s*    ++R}TF/-RR+#D))r   )rA   zSet[str])8
__future__r   r`   r-   r   rj   r   r   	functoolsr   typingr   r   r   r	   r
   torch._inductorr   torch._inductor.utilsr   torch._inductor.schedulerr   r   r   r    r   r   r   r"   r#   r   r   r$   r%   r&   r'   r)   r1   r3   rD   rF   ru   r   r   r   r   r   r   r   r   r   r   r   rL   r   r   r   r   r   <module>r      s   " 
   	 	 !  8 8 " 4 ;  !"       8:4 9     ! ! ! CE !#? D ,- ) 0 3 3 3M8 82 46 0 5 8/ 8/ 8/v            -	   6+ 30&&'T!  +*r   