
    ɯwgì                      U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dlm
Z
 d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlZd dlm Z m!Z!m"Z" d dl#m$c m%c m&Z' d dl(Z)d dl*Z)d dl+m,c m-Z. d d	l/m0Z0 d d
l1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z=m>Z> d dl?m@Z@ d dlAmBZBmCZCmDZDmEZEmFZFmGZGmHZH d dlImJZJ d dlKmLZLmMZMmNZN d dlOmPZP ddlQmRZRmSZS ddlTmUZUmVZV ddlSmWZWmXZXmYZYmZZZ ddl[m\Z\ ddl]m^Z^m_Z_ ddl`maZa ddlbmcZc ddl,mdZdmeZemfZfmgZgmhZhmiZimjZjmkZkmlZlmmZmmnZnmoZompZpmqZq ddlrmsZsmtZtmuZu erddlvmwZw  ed      Zx ed       Zy ed!      Zzee{e f   Z|d"e}d#<    ej                  e      Z ej                  ej                  d$%      Ze)j                  j                  Z	 ee{d&eed&f   d'd(eeee{eed&f   d&d'd(f         f   Zd"e}d)<   dd*Zdd+Zdd,Zdd-Z	 	 	 	 	 	 dd.Zg d/Zg d0Z	 	 	 	 dd1Zdd2Zeddd4       Zeddd5       Z	 d	 	 	 	 	 dd6Z	 	 	 	 dd7Zdd8Zdd9Zdd:Z G d; d(      Zej(                   G d< d=             Zej(                   G d> d?e             Zdd@Z G dA dBe      Zej(                   G dC dDe             Z edE       edF       edG       edH       edI       edJ      dKZdLe}dM<   	 d	 	 	 	 	 	 	 ddNZ	 	 	 	 	 	 	 	 ddOZej(                   G dP dQe             Z G dR dSe      Zej(                   G dT dUe             Zej(                   G dV dWe             Zej(                   G dX dYe             ZddZZdd[Z	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 dd\Z ej                  ed3]      Z	 	 	 	 	 	 dd^Zej(                   G d_ d`e             Zej(                   G da dbe             Zej(                   G dc dde             Z G de dfe      Zej(                   G dg dhe             Zej(                   G di dje             Zej(                   G dk dle             Zej(                   G dm dne             Z G do dpe      Z G dq dre      Zej(                   G ds dte             Zej(                   G du dve             Z	 	 	 	 	 	 ddwZddxZej(                   G dy dze             Z G d{ d|e      Z G d} d~e      Z G d de      Z G d de      Z G d de      Zej(                   G d de             Zej(                   G d dee             Z G d de      Z G d de      Z G d de      Z G d de      Zej(                   G d de             Z G d de      Z G d de«      Zee{eeeeee{eeef      f   Z G d d      Z G d deǫ      Z G d deë      Z G d de«      Z G d de«      Zej(                   G d de             Z G d de̫      Z G d deͫ      Zej(                   G d de̫             Zej(                   G d deϫ             Z G d deЫ      Z G d deϫ      Z G d de      Z G d deϫ      Z G d deϫ      Z G d deϫ      Z G d deϫ      Z G d de׫      Z G d deҫ      Z G d deϫ      Z G d deϫ      Z G d deЫ      Z G dÄ deϫ      Z G dń deϫ      Zej(                   G dǄ dȫ             Z eJej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  g      Z G dɄ deҫ      Zej(                   G d˄ de             Zej(                   G d̈́ de             Z G dτ deϫ      Zej(                   G dф de             Z G dӄ d&e      Z G dԄ de      Zej(                   G dք de             Zdd؄Zej(                   G dل deϫ             Zej(                   G dۄ deϫ             Z G d݄ de      Zej(                   G d߄ de             Z G d de      Z  G d de       ZddZy)    )annotationsN)nullcontext)partial)AnyCallableClassVarContextManagerDictIterableListLiteralOptionaloverloadSequenceTupleTYPE_CHECKINGTypeVarUnion)	TypeAlias)patch)ExprIntegerSymbol)get_interface_for_device)identity)GraphModuleSerializer)can_auto_functionalize)metrics)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)get_schema_info)CallMethodKeycompute_unbacked_bindingsDivideByKeyfree_unbacked_symbolsrebind_unbackedresolve_unbacked_bindingsSymTypes
OrderedSet)CleanDivFloorDivModularIndexing)SymT   )configdependencies)BackendFeatureindex_prevent_reordering)extract_free_unbacked_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResult)benchmarker)ReductionHint)argsortcache_on_selfceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningget_kernel_metadata
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_subs)opsOpsValueV)GraphLowering_T_U_Vr   _IntLikez  prefix	TensorBoxr   IRNode_NodeOrNodesc                "    dfd |        y )Nc                   | y t        | t        t        f      r| D ]
  } |        y t        | t              r| j	                         D ]
  } |        y t        | t
        j                  j                  j                  t        t        t        t        j                  j                  j                  t         t"        t$        f      sJ dt'        |        d       y )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])
isinstancelisttupledictvaluestorch	_inductorir
ExpandViewDynamicScalarAssertScalarrX   sympylogicboolalgBooleanr   intEffectfulKerneltype)nodesnode_check_tensorboxs     W/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/torch/_inductor/ir.pyrq   z%validate_ir.<locals>._check_tensorbox   s     =e}- ' &'t$ ' &' OO&&11! KK''//#	 k U}$ijk     )ro   Optional[_NodeOrNodes]returnNone )node_or_nodesrq   s    @rr   validate_irry      s    k6 ]#rs   c                8     t         t              sJ d fd}|S )Nc                 0     t        t              | i |S N)getattrrN   )argskwargsnames     rr   fnzops_wrapper.<locals>.fn   s    !wsD!42622rs   )r~   objectr   r   ru   rO   )r]   str)r   r   s   ` rr   ops_wrapperr      s    dC   3 Irs   c           
     b    t        t        | t        t        |                         dfd}|S )Nc                    t        |       t              k(  sJ t        t        |             D cg c]
  }| |       c}S c c}w r|   lenrange)indexi	inv_orders     rr   reindexz inverse_reorder.<locals>.reindex   s?    5zS^+++-23u:->?il#???   Ar   Sequence[_T]ru   r   )r`   zipr   r   )orderr   r   s     @rr   inverse_reorderr      s*    Sc%j 123I@ Nrs   c                     d fd}|S )Nc                    t        |       t              k(  sJ t        t        |             D cg c]
  }| |       c}S c c}w r|   r   )r   r   r   s     rr   r   zsame_reorder.<locals>.reindex   s>    5zSZ''').s5z):;AeAh;;;r   r   rw   )r   r   s   ` rr   same_reorderr      s    < Nrs   c                     d fd}|S )Nc                       |             S r|   rw   )r   reindex1reindex2s    rr   r   z fuse_reindexing.<locals>.reindex   s    ((rs   )r   r   ru   zSequence[_V]rw   )r   r   r   s   `` rr   fuse_reindexingr      s    ) Nrs   )   r      r2   )   r   r   r   r2   c                    t        |       D ci c]  \  }}||
 }}}t        t        |             D cg c]  }||   	 }}|S c c}}w c c}w )z
    Convert stride order to fill order
    For channel last format,

    stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
    )	enumerater   r   )r   idxposlookupr   
fill_orders         rr   stride_order2fill_orderr      sR     (1'7883c3h8F8%*3u:%67&)7J7 97s
   AAc                    t        |       }t        t        |             D cg c]  }d }}t        |      D ]
  \  }}|||<    |S c c}w )z)
    Convert strides to stride order
    r   )r@   r   r   r   )seq
sorted_idx_outr   elems         rr   get_stride_orderr      sS     $CLJCHo
&1
&C
&Z( 4D	J 's   	ATc                     y r|   rw   xguard_shapes     rr   ir_node_to_tensorr         rs   c                     y r|   rw   r   s     rr   r   r   
  r   rs   c                   | y |s%t         j                  j                  j                  }nt        }| j                         D cg c]
  } ||       }}t        |       r.| j                         j                  D cg c]
  } ||       }}nt        j                  |      }| j                         }| j                         }t        |      }t        |      }t         j                  j                  j                  j                         5  t!        j"                  ||||      j%                         }d d d        |S c c}w c c}w # 1 sw Y   S xY w)N)sizestridedtypedevice)rP   graphsizevars	size_hintr   get_sizeis_storage_and_layout
get_layoutr   FlexibleLayoutcontiguous_strides	get_dtype
get_devicerD   	shape_envsuppress_guardsrb   empty_stridedzero_)	r   r   shape_fnsr   r   r   r   ts	            rr   r   r     s    	y 77##--!".AHQK.D.Q'(||~'<'<=!(1+==2248KKME\\^F"4(D$V,F	
			#	#	3	3	5 fE&

%' 	
 H / > Hs   D<;E
(EEc                0    t        | t              r| sd gS | S r|   )r]   r^   values    rr   may_convert_to_optionalr   +  s     %u vLrs   c                    t        | dd       x}rt         |             S t        | t        j                        r| j
                  S y )Nr   )r}   get_device_typer]   rb   r   rn   )r   r   s     rr   r   r   5  s=    Qd33z3z|,,!U\\"vvrs   c                H    t        |       }t        |xr t        |            S r|   )r   boolrH   )r   r   s     rr   	is_tritonr   =  s     AE'&-((rs   c                    t        |       dk(  S Ncpu)r   r   s    rr   is_cpur   B  s    1&&rs   c                  >   e Zd ZU  e       Zded<   eej                  d*d              Z	d Z
d+dZd Zd Zd,dZd-d	Zd
 Zd Zd Zed        Zd Zd Zd Zd.dZded<   ded<   ded<   ded<   ded<   ded<   ded<   ded <   d!ed"<   d!ed#<   d$ed%<   d&ed'<   d(ed)<   y)/rY   zClassVar[OrderedSet[Any]]_current_originsc              #     K   t         j                  }|| z  t         _        	 d  |t         _        y # |t         _        w xY wwr|   )rY   r   )originsolds     rr   current_originszIRNode.current_originsI  s7      %%"%-	*&)F#cF#s   A2 A?Ac                    t        | j                        | _        t        j                  rt        j                         | _        y d | _        y r|   )r-   r   r   r3   debug_ir_traceback	tracebackformat_stackselfs    rr   __post_init__zIRNode.__post_init__S  s3    !$"7"785;5N5N//1TXrs   c                0    t        dt        |              )NzNYI on NotImplementedErrorrn   r   s    rr   get_read_nameszIRNode.get_read_namesW  s    !GDJ<"899rs   c                    | j                   S r|   )r   r   s    rr   get_tracebackzIRNode.get_tracebackZ  s    ~~rs   c                    t         r|   r   r   s    rr   get_defining_opzIRNode.get_defining_op]      !!rs   c                X    dt        | dd       }|rt        |      dkD  r|d d  d}|gS )Nzorigins=r    @   =   z...)r}   r   )r   shortenr   s      rr   common_reprzIRNode.common_repr`  s@    WT9b9:;s7|b( "c*Gyrs   c                    || j                  |      z   }t        t        t        |            }|r5t	        dj                  |            }t        |       j                   d| dS t        |       j                   d| dS )Nz,
z(
z
)())r   r^   mapr   indentjoinrn   __name__)r   linesr   	multiline	new_liness        rr   
str_helperzIRNode.str_helperg  sy    ((11Se_%uzz%01I4j))*#i[<<4j))*!E7!44rs   c                    | j                   S r|   r   r   s    rr   r   zIRNode.get_dtypep  s    zzrs   c                2    t        dt        |        d      )Nz#get_layout() is not implemented by !r   r   s    rr   r   zIRNode.get_layouts  s    !$GT
|ST"UVVrs   c                2    t        dt        |        d      )Nz!get_size() is not implemented by r  r   r   s    rr   r   zIRNode.get_sizev  s    !$Ed4j\QR"STTrs   c                "    | j                         S r|   r   r   s    rr   shapezIRNode.shapey  s    }}rs   c                4    t        | j                               S r|   )rL   r   r   s    rr   	get_numelzIRNode.get_numel}  s    T]]_--rs   c                    t         j                  j                  j                  t	        j
                  | j                         d            S Nr   rP   r   r   is_expr_static_and_truerh   Eqr	  r   s    rr   is_zero_elementszIRNode.is_zero_elements  0    ww77AQST8UVVrs   c                0    t        dt        |              )a)  
        If the IRNode refers to data which has not been materialized (e.g.,
        it is a Pointwise/Reduction that could potentially have more
        compute fused into it), realize the IRNode into physical memory,
        ending the possibility of fusing into it, but allowing, e.g., multiple
        users to access the data without having to recompute.

        Check StorageBox.realize for a particularly notable implementation.

        TODO(ezyang): I think, in principle, every IRNode should have an
        implementation of this, and most of the time no-op is OK, but you
        really do have to audit each IRNode for this, so for now, raise
        an error if it's not implemented.  Note that some code in graph.py
        will catch this thrown error and suppress it with a warning.
        zrealize NYI on r   r   s    rr   realizezIRNode.realize  s      "ODJ<"@AArs   Nc                0    t        dt        |              )Nzcodegen_reference NYI on r   r   writers     rr   codegen_referencezIRNode.codegen_reference  s    !$=d4j\"JKKrs   zCallable[[], torch.device]r   torch.dtyper   zCallable[[], str]get_namezCallable[[], Any]	get_readszCallable[[], int]	num_reads
get_strideget_storage_numelzCallable[[], bool]has_exceeded_max_readsz"Callable[[], Callable[[Any], Any]]make_loadermake_indexerzCallable[[int], None]
mark_reusezCallable[[], None]realize_hintz&Callable[[], OrderedSet[sympy.Symbol]]get_unbacked_symbol_uses)r   zOrderedSet[torch.fx.Node]ru   zOrderedSet[str]T)TTr|   )r   
__module____qualname__r-   r   __annotations__staticmethod
contextlibcontextmanagerr   r   r   r   r   r   r   r   r   r   propertyr  r	  r  r  r  rw   rs   rr   rY   rY   F  s    2<,/>*  *Y:"5WU  .WB$L +*    !!((..3344%%$$DDrs   c                  p    e Zd Zd Zd Zd Zd ZddZd Zd Z	d Z
d	 Zdd
Zd ZddZddZddZd Zy)	Operationc                    d | _         y r|   operation_namer   s    rr   r   zOperation.__post_init__  s
    -1rs   c                    t         r|   r   r   s    rr   r   zOperation.get_device  r   rs   c                6    t        | d      sJ | j                  S )Norigin_node)hasattrr3  r   s    rr   get_origin_nodezOperation.get_origin_node  s    t]+++rs   c                6    t        | d      sJ | j                  S )Nr   )r4  r   r   s    rr   get_originszOperation.get_origins  s    tY'''||rs   c                6    | j                   J | j                   S r|   r/  r   s    rr   get_operation_namezOperation.get_operation_name  s     ""..."""rs   c                     yNFrw   r   s    rr   	is_externzOperation.is_extern      rs   c                     yr;  rw   r   s    rr   is_no_opzOperation.is_no_op  r=  rs   c                    t         r|   r   r   s    rr   get_read_writeszOperation.get_read_writes  r   rs   c                &    || j                         v S r|   )r   )r   r   s     rr   
is_user_ofzOperation.is_user_of  s    t**,,,rs   c                B    t        d | j                         D              S )Nc              3  4   K   | ]  }|j                     y wr|   r   ).0deps     rr   	<genexpr>z+Operation.get_read_names.<locals>.<genexpr>  s     ?s#((?s   )r-   r  r   s    rr   r   zOperation.get_read_names  s    ?dnn.>???rs   c                6    | j                         j                  S r|   )rA  readsr   s    rr   r  zOperation.get_reads  s    ##%+++rs   c                    t         r|   r   r   s    rr   get_outputszOperation.get_outputs  r   rs   c                    t               S r|   r,   r   s    rr   get_unbacked_symbol_defsz"Operation.get_unbacked_symbol_defs  
    |rs   c                    t               S )a  
        Returns the unbacked symbols which are required to be in scope in
        order to successfully perform codegen for this buffer.  For example,
        a buffer that corresponds to an extern kernel call that takes i0 as
        an argument would return {i0} here.  This is used to generate necessary
        dependencies that ensure we actually bind i0 in codegen before you
        try to use it.

        Note that this is NOT transitive; in particular, if this buffer takes
        in as input another buffer with dynamic shape (e.g., (i0,)), we will
        not report it here, because you will already have a dependency
        on that buffer, which will eventually have a dependency on i0 if
        necessary.
        r,   r   s    rr   r"  z"Operation.get_unbacked_symbol_uses  s     |rs   c                     y)z
        Gets extra global memory size needed by this buffer.
        Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
        r   rw   r   s    rr   get_workspace_sizezOperation.get_workspace_size  s    
 rs   Nru   r   r#  ru   List[Buffer]ru   zOrderedSet[sympy.Symbol])r   r%  r&  r   r   r5  r7  r9  r<  r?  rA  rC  r   r  rM  rO  r"  rS  rw   rs   rr   r-  r-    sN    2" #"-@,""rs   r-  c                      e Zd ZU ded<   ded<   ded<   ded<   dd	Zdd
Z fdZeZd Zd Z	d Z
d Zd Zed        Zeej"                  fd       Zed d       Zd Zed        Zd Zd Zd Zd!dZd Zd Zd Zd Z xZS )"Loopstorch.devicer   r  r   Callable[..., Any]inner_fn
List[Expr]rangesc                x     t               j                  g d | j                  D        | j                          S )Nc              3  2   K   | ]  }t        |        y wr|   r(   rG  es     rr   rI  z1Loops.get_unbacked_symbol_uses.<locals>.<genexpr>  s     <1#A&<   )r-   unionr^  inner_fn_free_unbacked_symbolsr   s    rr   r"  zLoops.get_unbacked_symbol_uses  s;    !z|!! 
<<
//1
 	
rs   c                   | j                  d| j                  j                   dt        | j                        | j                         g|D cg c]  }| dt        | |        c}z   d| j                  gz         S c c}w )N'=origin_node=)r   r   rn   r   r   inner_fn_strr}   r3  )r   namesr   s      rr   __str__zLoops.__str__  s    DKK$$%Q'DJJ!!#
 <AA4$qt,-.AB d..1234
 	
 Bs   A?
c                0    t         |           d | _        y r|   superr   r3  r   	__class__s    rr   r   zLoops.__post_init__      rs   c                    | j                   S r|   r   r   s    rr   r   zLoops.get_device      {{rs   c                    | j                   S r|   r3  r   s    rr   r5  zLoops.get_origin_node      rs   c                    | j                   S r|   r^  r   s    rr   r   zLoops.get_size  rv  rs   c                    | j                   S r|   r{  r   s    rr   get_pointwise_sizezLoops.get_pointwise_size  rv  rs   c                     yr;  rw   r   s    rr   r<  zLoops.is_extern  r=  rs   c                    |j                  dd       }|j                  dd       } | |i |}||_        t        j                  r|xs t	        j
                         nd |_        t        j                  |      S )Nr3  r   )popr3  r3   r   r   r   rX   create)clsr~   r   r3  tbrs         rr   r  zLoops.create  sm    jj5ZZT*  #.4.G.GB*)((*T 	
 ""rs   c                    t        |       D cg c]+  \  }}|dk(  rt        j                  d      nt        ||      - c}}S c c}}w Nr2   r   )r   rh   r   rK   )r^  rW   nr   s       rr   _indexzLoops._index'  sJ     "&)
1 !"QEMM!,J6ST,UU
 	
 
s   0Ac                `   t        t        j                               }t        j                  |      5  t	        j
                  t        dd      5   | j                  | j                           |j                         cd d d        cd d d        S # 1 sw Y   nxY wd d d        y # 1 sw Y   y xY wNallow_indexingT)
r<   rP   MockHandlerset_ops_handlerr   r   r   r\  inner_fn_argsgetvalue)r   	opcounters     rr   inner_fn_opcountzLoops.inner_fn_opcount.  s     1	y) 	(5<<,d,
 	( DMM4--/0%%'		( 	( 	( 	( 	( 	( 	(s#   B$-B<	B$B	B$$B-c                :    | j                  | j                        fS r|   )r  r^  r   s    rr   r  zLoops.inner_fn_args7  s    DKK(**rs   c                r    t        j                  j                  | j                  g| j	                          S r|   )rP   KernelFormatterHandlerir_to_stringr\  r  r   s    rr   rk  zLoops.inner_fn_str:  s3    ''44MM
 ..0
 	
rs   c                X    | j                         j                  t        j                  kD  S r|   )r  num_opsr3   realize_opcount_thresholdr   s    rr   has_large_inner_fnzLoops.has_large_inner_fn@  s"    $$&..1Q1QQQrs   c                d    | j                  | j                        }t        | j                  |      S r|   )r  r^  r7   r\  )r   r   s     rr   rf  z$Loops.inner_fn_free_unbacked_symbolsC  s%    DKK(,T]]EBBrs   c                |   t        j                  t        dd      5  | j                         rJt	        | j                         | j                         | j                               j                  cd d d        S t	        | j                         | j                               j                  cd d d        S # 1 sw Y   y xY wr  )	r   r   r   get_reduction_typer9   r  r   get_reduction_sizerK  r   s    rr   r  zLoops.get_readsG  s    \\.*:DA 	&&(*$$&MMO++- %	 	 +$$&MMO %	 	 	s   AB271B22B;c                H    t        | j                         j                        S r|   )r-   r  read_buffersr   s    rr   r   zLoops.get_read_namesU  s    $//1>>??rs   c                H    t        | j                         j                        S r|   )r   r  r  r   s    rr   r  zLoops.num_readsX  s    4((*7788rs   c                2    t        dt        |        d      )Nz+get_reduction_size() is not implemented by r  r   r   s    rr   r  zLoops.get_reduction_size[      !9$t*QG
 	
rs   c                2    t        dt        |        d      )Nz+get_reduction_type() is not implemented by r  r   r   s    rr   r  zLoops.get_reduction_type`  r  rs   c                2    t        dt        |        d      )Nz+constant_to_device() is not implemented by r  r   r   r   s     rr   constant_to_devicezLoops.constant_to_devicee  r  rs   rW  )r{  )ru   r=   r#  ) r   r%  r&  r'  r"  rm  r   __repr__r   r5  r   r}  r<  classmethodr  r(  r1   INDEXr  rA   r  r  rk  r  rf  r  r   r  r  r  r  __classcell__rr  s   @rr   rY  rY    s      
	
  H  # # "jj 
 
 ( (+ 
 

RC@9




rs   rY  c                   |j                   rt        j                  t        d      |      S t        j                  d|      S )Nnanr   )is_floating_pointrN   constantfloat)r   r   s     rr   nop_loader_fnr  k  s1    ||E%L%00||Au%%rs   c                  *    e Zd Zd Zd Zd Zd Zd Zy)	Pointwisec                p    | j                         rt        t        | j                        S | j                  S Nr  )r  r   r  r   r\  r   s    rr   r  zPointwise.make_loaders  s)      "=

;;}}rs   c                    g S r|   rw   r   s    rr   r  zPointwise.get_reduction_sizez  s    	rs   c                     y r|   rw   r   s    rr   r  zPointwise.get_reduction_type}      rs   c                h    | j                         }t        j                  | ||       ||            S r|   )r  rN   storer   output_nameindexervarsloaders        rr   store_outputzPointwise.store_output  s+    !!#yygdmVD\BBrs   c                    | j                         } t        j                  t        d|      |      }t	        || j
                  || j                        S FMove this to a given device. Requires that all reads are to constants.override_device)r  r   r   ConstantBufferr  r   r^  r   r   r  s      rr   r  zPointwise.constant_to_device  sC    !!#Hn.?HPVT[[AArs   N)r   r%  r&  r  r  r  r  r  rw   rs   rr   r  r  r  s    CBrs   r  c                  2    e Zd ZU ded<   dZded<   d Zd Zy)ScatterzCallable[[List[Expr]], Expr]output_indexerNOptional[str]scatter_modec                    | j                         } t        j                  t        d|      |      }t	        || j
                  || j                  | j                  | j                        S r  )	r  r   r   r  r  r   r^  r  r  r  s      rr   r  zScatter.constant_to_device  s]    !!#Hn.?HPJJKK
 	
rs   c                    | j                         }t        j                  | || j                  |             ||      | j                        S )N)mode)r  rN   r  r  r  r  s        rr   r  zScatter.store_output  sG    !!#yyD''-.4L""	
 	
rs   )r   r%  r&  r'  r  r  r  rw   rs   rr   r  r    s    00"&L-&

rs   r  
logical_ormaximumminimummuladdbitwise_xor)anymaxminprodsumxor_sumz"Dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNc                      t         v r	t             S  dv r	 	 	 	 	 	 d fd}|S  dk(  r	 	 	 	 	 	 dd}|S t        d        )Nargmaxargminc                   | \  }}|\  }}dk(  rt        j                  ||      }nt        j                  ||      }t        j                  ||      }t	              rt        j
                  ||      }t        j
                  ||      }	t        j                  |t        j                  ||	            }t        j                  |t        j                  ||	            }rt        j                  ||      nt        j                  ||      }
t        j                  |t        j                  ||
            }t        j                  |||      t        j                  |||      fS )Nr  )	rN   ltgteqr!   ner  logical_andwhere)aba_valuea_indexb_valueb_indexmaskequala_isnanb_isnantiearg_break_ties_leftr   reduction_types              rr   argmax_combine_fnz3get_reduction_combine_fn.<locals>.argmax_combine_fn  s     !GW GW)vvgw/vvgw/FF7G,Ee$&&'2&&'2~~dCFF7G,DEucoogw.OP ' w(VVGW- 
 >>$s(CDD		$1		$1 rs   welford_combinec                l    | \  }}}|\  }}}||z
  }||z   }	||	z  }
|||
z  z   ||z   ||z  |z  |
z  z   |	fS r|   rw   )r  r  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              rr   welford_combine_fnz4get_reduction_combine_fn.<locals>.welford_combine_fn  sm     &'"FD(%&"FD(VOE!H,J :-I**teemh6BB rs   zunknown reduction_type=)r  Tuple[object, object]r  r  ru   zTuple[OpsValue, OpsValue])r  #Tuple[OpsValue, OpsValue, OpsValue]r  r  ru   r  )r  r   )r  r   r  r  r   s   ```  rr   get_reduction_combine_fnr    s     --#N33	/	/	$	)>	&	: ! 	,	,	2	2	 1	  "! "$;N;K"LMMrs   c                   t        |      D cg c]5  \  }}t        j                  j                  j	                  |d      dk7  r|7 }}}|D cg c].  }t        j                  j                  j	                  | |         0 } }|D cg c].  }t        j                  j                  j	                  ||         0 }}| |k(  S c c}}w c c}w c c}w )zP
    Returns true if the strides are equal, ignoring dimensions of size 1 .
    r   fallbackr2   )r   rP   r   r   r   )strides1strides2r   r   dimnon_1_indicess         rr   significant_strides_equalr    s      oAs77%%cA%6!; 	
M 
 BOOA  **8A;7OHOANOA  **8A;7OHOx
 POs   :C3C		3Cc                  
    e Zd ZU ded<   ded<   ded<   ded<   dd	Zdd
Zd  fdZd Zd Zd Z	d Z
d Zd Zd Ze	 d!	 d"d       Zed        Zeej&                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d#d       Zed        Zed        Ze	 	 	 	 	 	 	 	 d$d       Zed        Zed        Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d%d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d&d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'd       Z xZS )(	Reductionr]  reduction_rangesr   r  r  	src_dtyper?   reduction_hintc                0    t         j                  | d      S )N)r^  r  r  )rl  )rY  rm  r   s    rr   rm  zReduction.__str__  s    }}H  
 	
rs   c                "    | j                         S r|   )rm  r   s    rr   r  zReduction.__repr__  s    ||~rs   c                r    t         |           t               j                  d | j                  D         z  S )Nc              3  2   K   | ]  }t        |        y wr|   ra  rb  s     rr   rI  z5Reduction.get_unbacked_symbol_uses.<locals>.<genexpr>  s     F1#A&Frd  )rp  r"  r-   re  r  rq  s    rr   r"  z"Reduction.get_unbacked_symbol_uses  s8    w/14FJL4F4FF0E0EF5
 
 	
rs   c                    | j                   S r|   )r  r   s    rr   r  zReduction.get_reduction_size  s    $$$rs   c                    | j                   S r|   )r  r   s    rr   r  zReduction.get_reduction_type      """rs   c           	         t        j                  | j                  | j                  | j                  | j                  ||            }t        j                  | ||      |      S r|   )rN   	reductionr   r  r  r\  store_reduction)r   r  r  r  reduction_varsr   s         rr   r  zReduction.store_reduction  sP    JJNNMM$/	
 "";uEErs   c                X    t        | j                        t        | j                        z   S r|   )r   r^  r  r   s    rr   index_lengthzReduction.index_length$  s!    4;;#d&;&;"<<<rs   c                    | j                  | j                        }| j                  | j                  t        j                        }||fS r|   )r  r^  r  r1   RINDEXr   r   rindexs      rr   r  zReduction.inner_fn_args'  s8    DKK(T22DKK@vrs   c                    | j                  | j                        }| j                  | j                  t        j                        }t        | j                  ||      S r|   )r  r^  r  r1   r  r7   r\  r   s      rr   rf  z(Reduction.inner_fn_free_unbacked_symbols,  sA    DKK(T22DKK@,T]]E6JJrs   c           
        | j                         } t        j                  t        d|      |      }t	        || j
                  || j                  | j                  | j                  | j                  t        j                        S r  )r  r   r   r  r  r   r^  r  r  r  r?   DEFAULTr  s      rr   r  zReduction.constant_to_device1  sm    !!#Hn.?HPJJKK!!NN!!	
 		
rs   Nc	           
        !"#$% d }	t         j                  j                  j                  |      }
t         j                  j                  j                  t	        |            }t         j                  j                  | t        j                         xr* |dvxr$ t        j                  xr  |	|
      xr  |	|      }|st        j                  dfS t        t        |             }|j                  j                  |       }t        |       dk(  r|j                   $n|j"                  $d#d!d%#$z  %z  "!$z  %z    !"#$%fd} !"#$%fd	}|dk(  r ||
|      }|dk(  rt        j$                  |fS |t'        |t(              rxt+        |      \  }}|h|ft         j                  j                  j                  t	        ||z               }|
|k(  r,t,        j/                  d
|||||       t        j$                  dfS t        j$                  |fS |
#k  s|$dz  dz  k\  rt        j                  dfS t1        | ||||||t        j                        }d } ||      \  }}|r ||      \  }}t3        |      dk(  rt        j                  dfS t5        j6                  |j9                         |j;                               \  \  }}}d}d}|D ]  }t         j                  j                  j=                  ||      }t         j                  j                  j?                  |||jA                               }tC        d |D              }|r|dz  }|dz  } ||kD  rt        j$                   ||
|      fS t        jD                   ||
|      fS )Nc                B    t        | t        t        j                  f      S r|   r]   rl   rh   r   r   s    rr   
_is_staticz(Reduction.num_splits.<locals>._is_staticL  s    a#u}}!566rs   r  r2   xpu       i   c                  	 d}d|z  }|dz  k\  ry| dk  ry| |z  k  r}n| |z  
k  rmz  d|z  z  }||z   dz
  |z  }| ||z  z   dz
  ||z  z  	t        j                  |       }t        |	fd      }t        |	z
        dk  rt	        |      }n>	}n;t        j                  |       }t        |fd	      }t        |z
        d
k  r|}n}| ||z  z   dz
  ||z  z  S )N   r*  r   r2   i    c                     t        | z
        S r|   absr   tmp_split_sizes    rr   <lambda>zFReduction.num_splits.<locals>.inner_reduction_splits.<locals>.<lambda>      c!n:L6M rs   key   c                     t        | z
        S r|   r/  r   max_elements_per_threads    rr   r3  zFReduction.num_splits.<locals>.inner_reduction_splits.<locals>.<lambda>      c!>U:U6V rs   2   rh   divisorsr  r0  r  )reduction_numel_hint
numel_hint	num_warpsnum_threads
split_sizetarget_blocksblocks_per_outputr>  closestr2  max_elements_per_devicer:  min_elements_per_devicemin_elements_per_threadnum_smthreads_per_sms            @rr   inner_reduction_splitsz4Reduction.num_splits.<locals>.inner_reduction_splitso  s8    Iy.KQZ'#t+#j04KK4
%
25LL & 7AO L%2Z%?!%C
$R!(;9J+JJQN!$55"7 !>>*>?h,MNw/025!$W.E!FJ!/J >>*>?h,VWw!889B>!(J!8J(:+CCaG[( rs   c                   d}|dz  }d}d}||z   dz
  |z  }| |z  k  r}n| |z  k  rjz  |z  }||z   dz
  |z  }| ||z  z   dz
  ||z  z  t        j                  |       }	t        |	fd      }
t        |
z
        dk  rt	        |
      }n>}n;t        j                  |       }	t        |	fd	      }
t        |
z
        d
k  r|
}n}| ||z  z   dz
  ||z  z  S )Nr-  r*  r      r2   c                     t        | z
        S r|   r/  r1  s    rr   r3  zFReduction.num_splits.<locals>.outer_reduction_splits.<locals>.<lambda>  r4  rs   r5     c                     t        | z
        S r|   r/  r9  s    rr   r3  zFReduction.num_splits.<locals>.outer_reduction_splits.<locals>.<lambda>  r;  rs   r<  r=  )r?  r@  rA  rB  rvals_per_threadxvals_per_blockxblocksrC  rD  r>  rF  r2  rG  r:  rH  rI  rJ  rK  s              @rr   outer_reduction_splitsz4Reduction.num_splits.<locals>.outer_reduction_splits  s8    I#b.K !O!O3a7OKG#j03JJ4
%
25LL & 7K H!.!81!< H(+;m+KKaO&6"8 !>>*>?h,MN~/025!$W.E!FJ!/J >>*>?h,VWw!889B>!(J!8J(+;j+HH1L :- rs   zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr   c           	        t        d t        | j                         | j                         | j	                               |       }|j                         }|j                  D  cg c]8  } t        | t        j                        rt        | t        j                        s| : }} g }d}t        |j                  d       D ]  t        fd|D              s|j                  j                         j                   t"        j$                  j&                  v sZt"        j$                  j&                  j                      }|j(                  j*                  }|j-                          |j(                  j*                  |k7  sd} ||fS c c} w )Nr   r   r   r   layoutdataFc                    | j                   S r|   rF  r   s    rr   r3  z@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>  s
    aff rs   r5  c              3  N   K   | ]  }|j                   j                  v   y wr|   )r   free_symbols)rG  r  mds     rr   rI  zAReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>  s      FaqBHH111Fs   "%T)ComputedBufferr   r   r   r   rA  
range_varsr]   rh   r   NumbersortedrK  allappendr   r   rP   r   name_to_bufferrZ  r   decide_layout)	r  cbread_writesra  indiceschangedbuforiginal_strider_  s	           @rr   get_read_indicesz.Reduction.num_splits.<locals>.get_read_indices  sA   %<<>++-
 B ,,.K %//a,Z5<<5P J 
 GG[..4DE +F:FFNN288,ww!''"8"88gg44RWW=*-***;*;))+::,,?&*G+ G##!s   $=Fr   c              3  &   K   | ]	  }|d kD    ywr2   Nrw   rG  r   s     rr   rI  z'Reduction.num_splits.<locals>.<genexpr>  s     /!A/   )#rP   r   r   symbolic_hintrL   has_featurer5   REDUCE_TO_SINGLE_ELEMENTr3   split_reductionsr?   r$  r   r   Workerget_device_propertiesgpu_subslice_countmulti_processor_countINNERr]   rX   r8   logdebugr  r   r4   index_vars_squeezer   r  simplify_with_rangesstride_hintskeysrd  OUTER)&r   	dst_dtyper  r\  r^  r  r  reduction_numel
input_noder(  r?  r@  should_splitdevice_interfacedevice_propertiesrL  rU  split
new_rangesnew_reduction_rangesextracted_numel_hintr  rn  rj  rk  r   r  	num_outer	num_innerr   stridesouterrG  r:  rH  rI  rJ  rK  s&                                   @@@@@@rr   
num_splitszReduction.num_splits@  s   	7  !ww//==oNWW%%33M&4IJ
 ##FN,S,STT 	'	' ''	' /0	' :& 	  ((!++3OF4KL,33II&Q6"e+&99F '<<F"$"%"9F"B^"S"9F"B^"S"	 "	H!	 !	H ?*+?LEz$**E11%*Z*K3V40
0 ).B.N+,77+;+;+I+I%j3G&GH,( ,/CC		G #,!&0	  -22B66 &&-- $;;VaZ"_, ((!++!!	
	$@ ,A.)!,JGQw<1 ((!++&2&E&EJJL!..0'
#NV 		 	A  55a@Agg&&33A~v{{}UG/w//EQ	Q		 y  &&(>$j)   !&&(>$j)  rs   c                0    D cg c]+  }t         j                  j                  j                  |      - c}t	        ||      fd|dv r:t        ddt        j                              j                          fdfdS  S c c}w )z1Convert inner_fn from a reduction to an pointwisec                     t        j                   fdt        j                  D cg c]  }t	        |       c} D              S c c}w )Nc              3  0   K   | ]  } |        y wr|   rw   )rG  r!  r   value_fns     rr   rI  z=Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>.  s        UF+   )	functoolsreduce	itertoolsproductr   )r   r   
combine_fnr  r  s   ` rr   r   z*Reduction._unroll_reduction_fn.<locals>.fn+  sH    ##"+"3"3,<=q%(=# 
 >s   A
r  r  Nc                    |D cg c]  }t        j                  |       }} | |      t        j                   |      t        j
                        fS c c}w r|   )rh   expandrN   
index_exprrb   int64)r   r!  r   flatten_indexr\  s      rr   r  z0Reduction._unroll_reduction_fn.<locals>.value_fn>  sN    39:a%,,q/::UF+NN=#8%++F  ;s   Ac                     |       d   S Nr2   rw   )r   r   s    rr   r3  z0Reduction._unroll_reduction_fn.<locals>.<lambda>E  s    E1 rs   )	rP   r   r   evaluate_static_shaper  FixedLayoutr   r   r  )	r\  r  r  r  r   r  r  r   r  s	   ``   @@@@rr   _unroll_reduction_fnzReduction._unroll_reduction_fn"  s     @P
:;AGG2215
 .niH
		 11' 112BC	
 ln  .-HIG
s   0Bc
                   t         j                  j                  j                  t	                    }
|
dk(  rifd} |d       |d       |d       |d      dj                         v s
J  d       fd}t        j                  |||t        |            S |
dk(  r(dv rfd	}nfd
}t        j                  |||      S t        |
t        j                        rrt         j                  j                  j                  |
      t        j                  k  r8t	        |      dk7  r*t        j                  || j                  |      |      S | j!                  ||||
|		      \  }}|t"        j$                  k(  r|}|dk(  r4|	J t'        |	      \  }}|J |J | j)                  ||||||
      S |dkD  r| j+                  |||||	      S t,        j                  t/        ||||            S )Nr   c                    t         j                  k(  rt        |       S j                  rt        |       S t	        |       S r|   )rb   r   r  r  rl   )valr  s    rr   py_cnstz!Reduction.create.<locals>.py_cnst]  sF     !EJJ. I !22 s
 Srs   r2   )r  r  r  r  z* not supported for zero-dimension tensors!c                6    t        j                           S r|   rN   r  )r   r  r  rtypes_to_initss    rr   const_fnz"Reduction.create.<locals>.const_fnr  s    ||ON$CYOOrs   r   r   r\  r^  r  c                0    t        j                  d      S r  r  )r   r  s    rr   r   zReduction.create.<locals>.fn  s    <<955rs   c                d    D cg c]  }t        j                  d       }} | |      S c c}w r  rh   r   )r   r   reduction_indexr\  r  s      rr   r   zReduction.create.<locals>.fn  s2    AQ&RAu}}Q'7&RO&R#E?;; 'S   -rV  )rP   r   r   simplifyrL   r  r  r  r^   r]   rh   r   r   r3   unroll_reductions_thresholdr  r  r?   r$  r8   !create_multilayer_existing_rangescreate_multilayerrX   r  )r  r   r  r  r\  r^  r  r  r  r  r  r  r  r   hintr  r  r  r  s     ` ` ``          @rr   r  zReduction.createJ  s    ''**33MBR4STa qz"1:
qz	O /"6"6"88M !!KLM8P ##!F|	 $   a!556
< ##FIr6BB 6  **?;001f%*##((.	   nn

e ]222!NB;)))/R0,J, )))'33388 $  QY(( 
 
  	
 	
rs   c                .   | dv rAt        |      rt        d      S t        |      ryt        j                  |      j
                  S | dv rAt        |      rt        d      S t        |      ryt        j                  |      j                  S ddddddd|    S )	N)r  r  z-infr   )r  r  infr2   r   r   r   )r  r  r  r  welford_reducer  )r!   r  r    rb   iinfor  r  r  r   s     rr   default_accumulatorzReduction.default_accumulator  s    ..e$V}$!%({{5)---..e$U|#!%({{5)--- '(
  	rs   c                :    | dk(  ryt         j                  | |      S )Nr  r   )r  r  r  s     rr   default_valuezReduction.default_value  s!    --,,^UCCrs   c                    | dk(  r|S | dk  r(|dk  r#|t         j                  k(  rt         j                  S | dk  r(|dk  r#|t         j                  k(  rt         j                  S |S )NrV  r+  i      )r?   r  
OUTER_TINY)r  r@  r  s      rr   _multilayer_second_step_hintz&Reduction._multilayer_second_step_hint  sg     B;!!C<J#-.MDWDW2W +++TMc!-"5"55 +++rs   c                   	 t         j                  |g      	t        j                  j                  j                  t        j                  |z  d             	fd}|S )Nr   c                .   |\  }| ^ }|z  |z   	fd}
rqt        j                  t        j                  t        j                        t        j                  t        j                              }t        j
                  ||      S  |       S )Nc                 $       g            S r|   rw   )rj  r  	new_indexr   s   rr   bodyzCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body  s    i');<<rs   )rN   r  r  rb   int32masked)r   r  reduction_blockr  r  rj  r  
block_sizedefaultr  	need_maskr  r   s        @@rr   
wrapper_fnz5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn  s|    !0_*/'Y ?2_DG= vvNN7EKK8NN?EKK@ zz$g66vrs   )Viewdynamic_reshape_indexerrP   r   r   r  rh   r  )
r  r  r  r  r  r  r  r  r  r   s
    ` ` `` @@rr   _multilayer_wrap_loaderz!Reduction._multilayer_wrap_loader
  s_     ../?/ARS((@@HH_u,a0
 
		 	" rs   c                    t        d D              s
J d       t        j                  |t        |      t        |      z         fd}|S )Nc              3  &   K   | ]	  }|d k(    ywrp  rw   rG  r  s     rr   rI  zDReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>6  s      
AF
rr  z8Only enabled for numel_hint == 1, found original_ranges=c           	         | d t               }| t              d  } | t        |      t        |      z               S r|   )r   r_   )merged_indexnew_reduction_indexoriginal_idxr  r  original_rangesr   s       rr   r  zEReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fn=  sO    '(>#o*>?L$S%9%;<Ii(51D+EEF rs   )rd  r  r  r_   )	r  r  r  original_reduction_rangesr  r  r  r  r   s	    ``     @rr   '_multilayer_wrap_loader_existing_rangesz1Reduction._multilayer_wrap_loader_existing_ranges,  sj      
+
 
 	IFo5GH	I 
 ..%uZ'85AU;V'V
	 rs   c                    |t         j                  t         j                  fvr|nt         j                  }t        j                  |||||||	|      }|j                          |j                         fd}t        j                  j                  j                  t        |            }| j                  |
||      }||dt        |       k(  sJ t        j                  t	        |||||t        |      d |	||            S )a
        Break a large reduction up into multiple smaller reductions
        recursively
        c                     g | |      S r|   rw   )r   r  intermediate_loaders     rr   intermediate_fnz;Reduction.create_multilayer_helper.<locals>.intermediate_fno  s    &'A'A'ABBrs   N)rb   float16bfloat16r  r  r  r  r  rP   r   r   r   rL   r  r   rX   )r  r   r  r  r  r  r  r  r  r  r  r  intermediate_dtypeintermediater  r@  r  s                   @rr   create_multilayer_helperz"Reduction.create_multilayer_helperG  s   0  ??  	
 !'' 	
 	*668	C WW%%//o0NO
99:~
 *-Cs?/C"DDDD3/12	
 	
rs   c
                    t        |      }
t        |
|dz
  z   |      }| j                  ||      }| j                  |||
|||      }| j	                  ||||||g |||g|||	      S )r  r2   )rL   r/   r  r  r  )r  r   r  r  r\  r^  r  r  r  r  r  r  r  r  s                 rr   r  zReduction.create_multilayer  s    $ ((89o;UC
##NI>00&
G

 ++feL
 	
rs   c                    | j                  |	|      }| j                  ||||||      }| j                  ||||||g ||||	d|
      S )r  rV  )r  r  r  )r  r   r  r  r\  r  r  r  r  r  r  r  r  s                rr   r  z+Reduction.create_multilayer_existing_ranges  sx    $ ##NI>@@% 

 ++%+o+
+ 
 	
rs   rT  rW  r|   )r  Optional[IRNode])r   rZ  r  r  r  r  r\  r[  r^  r]  r  r]  r  r   r  r?   r  r  )r  rl   r@  rl   r  r?   ru   r?   )r   rZ  r  r  r  r  r  r[  r  r]  r  r]  r  r]  r  r]  r  r   r  rl   r  r?   )r   rZ  r  r  r  r  r\  r[  r^  r]  r  r]  r  r   r  rl   r  r?   )r   rZ  r  r  r  r  r\  r[  r  r]  r  r]  r  r]  r  r]  r  r   r  r?   )r   r%  r&  r'  rm  r  r"  r  r  r  r  r  rf  r  r(  r  r  r  r?   r$  r  r  r  r  r  r  r  r  r  r  r  s   @rr   r  r     s     !!



%#F=
K

  (,_ %_ _B % %N  )6(=(='+L
L
 L
 	L

 %L
 L
 %L
 L
 &L
 %L
 L
\  2 D D
  #5B	    B  4 ;
;
 ;
 	;

 ';
 $;
 $.;
 ;
 );
 ;
 ;
 &;
 ;
z $
$
 $
 	$

 %$
 $
 %$
 $
 $
 &$
 $
L &
&
 &
 	&

 %&
 $&
 $.&
 &
 )&
 &
 &&
 &
rs   r  c                       e Zd ZU ded<    fdZd Zeej                  f	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z	e
d        Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d       Z xZS )
WelfordReductionrl   output_indexc	           
     t    t              dk(  rd   }	nfd}	t        
| 	  |||	|||||       || _        y )Nr2   r   c                2     t         fdD              S )Nc              3  0   K   | ]  } |        y wr|   rw   )rG  r   r   reduction_idxs     rr   rI  z<WelfordReduction.__init__.<locals>.loader.<locals>.<genexpr>  s     HR]3Hr  )r_   )r   r  	inner_fnss   ``rr   r  z)WelfordReduction.__init__.<locals>.loader  s    HiHHHrs   )r   rp  __init__r  )r   r   r   r  r^  r  r  r  r  r  rr  s      `      rr   r  zWelfordReduction.__init__  sQ     y>Qq\FI 			
 )rs   c           	         t        j                  | j                  | j                  | j                  | j                  ||            }|| j                     }t        j                  | ||      |      S r|   )rN   r  r   r  r  r\  r  r  )r   r  r  r  r  ra   r   s          rr   r  z WelfordReduction.store_reduction  s`    JJNNMM$/	
 t(()"";uEErs   c                   |dv sJ t         j                  j                  j                  t	                    }fd}	|dk(  r |	d      }
 |	d      } |	d      }|
||fS |dk(  r;fd|dk(  r |d          |	d       |	d      fS t        fd|D              S t        j                  |d   ||      \  }}|t        j                  k(  r|}|dkD  r| j                  ||||      S t        d	      D cg c]'  }t        j                  t        ||||            ) }}|D ]  }|j                           |S c c}w )
N)r  r  c                V      fd}t         j                  |t                    S )Nc                0    t        j                        S r|   r  )r   r   r  s    rr   r\  z8WelfordReduction.create.<locals>.const.<locals>.inner_fn  s    || rs   r  r  r  r^   )r  r\  r   r   r^  s   ` rr   constz&WelfordReduction.create.<locals>.const  s2     ##!F|	 $  rs   r   r2   c                V      fd}t         j                  |t                    S )Nc                d    D cg c]  }t        j                  d       }} | |      S c c}w r  r  )r   r   r  r  r  s      rr   r\  z7WelfordReduction.create.<locals>.copy.<locals>.inner_fn'  s2    AQ&RAu}}Q'7&RO&R!#77 'Sr  r  r  )r  r\  r   r   r^  r  s   ` rr   copyz%WelfordReduction.create.<locals>.copy&  s2    8 !''!%<	 (  rs   r  c              3  .   K   | ]  } |        y wr|   rw   )rG  r   r  s     rr   rI  z*WelfordReduction.create.<locals>.<genexpr>5  s     :"T"X:s   )r  r  r   )rP   r   r   r  rL   r_   r  r  r?   r$  r  r   rX   r  r  r  )r  r   r   r  r^  r  r  r  r  r  meanm2weightr  r  
output_idxresultsr   r  s    `` ``            @rr   r  zWelfordReduction.create  s    !FFFF''**33MBR4ST	 a8DqB1XFV##a
 !11IaL)58U1X==:	:::&  **aL)+ + 	
e ]222!N19(( 	 	0 $Ah
   $""	
 
  	AIIK	#
s   	,Ec                     y)Nr  rw   r  s     rr   r  zWelfordReduction.default_valuew  s    rs   c	                    t              t        j                  j                  j	                  t        j                  z  d             }	|	r?|dk7  r:fd}
 j                  ||d   t        |
d      t        |
d      f|d|      S t        dz
  z         t        j                  |t         fd|D              g |g||      }|D ]  }|j                           |D cg c]  }|j                          }}d t        j                  j                  j                  t        |            } j!                  ||      }t        j                  |t        fd	|D              |gd|      S c c}w )
r  r   r  c                0    t        j                  |      S r|   r  )r   r  r   r   s      rr   r  z4WelfordReduction.create_multilayer.<locals>.constant  s    ||E511rs   r   r2   )r   r   r  r^  r  r  r  r  c           	   3  L   K   | ]  }j                  |d         yw)r   )r  N)r  )rG  r  r  r  r  r  r  s     rr   rI  z5WelfordReduction.create_multilayer.<locals>.<genexpr>  s=      
  ++$# , 
s   !$c                     |g | |      S r|   rw   )r   r  r  s      rr   intermediate_loader_fnzBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fn  s    4E4O455rs   c              3  T   K   | ]  }t        |j                                 ! yw))r  N)r   r  )rG  r   r
  s     rr   rI  z5WelfordReduction.create_multilayer.<locals>.<genexpr>  s*       .q}}GG   %()rL   rP   r   r   r  rh   r  r  r   r/   r  r  r_   r  r  r   r  )r  r   r   r  r^  r  r  r  r  r  r  intermediatesr   	i_loadersr@  r  r
  r  s   ` `  ` `       @@@rr   r  z"WelfordReduction.create_multilayer{  s     ((89((@@HH_u,a0
 
	 +<<2 ((aLHA.HA.
 !10- )   o;UC
(// 
 (
 
 feL#
&  	AIIK	 /<<Q]]_<	<	6 WW%%//f0EF
99:~
  && &  G
 	
 =s   8F)r   rZ  r   r  r  Sequence[Callable[..., Any]]r^  r]  r  r]  r  r   r  r?   )r   rZ  r   r  r  r  r^  r]  r  r]  r  r   r  rl   r  r?   )r   r%  r&  r'  r  r  r  r?   r$  r  r(  r  r  r  r  s   @rr   r  r    s   )<F  )6(=(=ss s 0	s
 s %s s &s sj   V
V
 V
 0	V

 V
 %V
 V
 V
 &V
 V
rs   r  c                  0    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   d fdZ fdZd Zd Zd Zd Z	d Z
d Zd Zd Zeej                   fdd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d d       Z xZS )!Scanr]  scan_rangesr   =Callable[[Tuple[Any, ...], Tuple[Any, ...]], Tuple[Any, ...]]r  .Callable[[List[Expr], List[Expr]], List[Expr]]r   r?   r  rl   r  Tuple[torch.dtype, ...]dtypesTuple[Callable[..., Any], ...]r  c                    t         |           t               j                  d | j                  D         z   t               j                  d | j
                  D         z  S )Nc              3  2   K   | ]  }t        |        y wr|   ra  rb  s     rr   rI  z0Scan.get_unbacked_symbol_uses.<locals>.<genexpr>       "V#8#;"Vrd  c              3  2   K   | ]  }t        |        y wr|   ra  rb  s     rr   rI  z0Scan.get_unbacked_symbol_uses.<locals>.<genexpr>       "O#8#;"Ord  )rp  r"  r-   re  r  r   rq  s    rr   r"  zScan.get_unbacked_symbol_uses  s[    
 G,. jl  "VTEUEU"VWX jl  "OTYY"OPQ	
rs   c                    t        | j                        t        | j                        z   t        | j                        k(  sJ t        |           y r|   )r   r^  r  r   rp  r   rq  s    rr   r   zScan.__post_init__  =    4;;#d&6&6"773tyy>IIIrs   c                   | j                  ||      }| j                  D cg c]
  } ||       }}t        j                  | j                  | j
                  |      }t        j                  | ||      || j                           S c c}w r|   )r   r  rN   scanr  r  r  r  )	r   r  r  r  	scan_varsr   r\  ra   results	            rr   r  zScan.store_reduction  so    ll4+04?H(3-??$++t?yygclF4;L;L4MNN @s   Bc                     y)Ncustomrw   r   s    rr   r  zScan.get_reduction_type  s    rs   c                    | j                   S r|   )r  r   s    rr   r  zScan.get_reduction_size  ry  rs   c                    | j                   S r|   r   r   s    rr   r   zScan.get_size      yyrs   c                    | j                   S r|   r{  r   s    rr   r}  zScan.get_pointwise_size  rv  rs   c                X    t        | j                        t        | j                        z   S r|   )r   r^  r  r   s    rr   r  zScan.index_length  !    4;;#d&6&6"777rs   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }|fS r|   )r  r^  r  r1   r  r   r   r   r!  r   s       rr   r  zScan.inner_fn_args  E    DKK(T--t{{;ll5&)vrs   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }t        | j                  |      S r|   )r  r^  r  r1   r  r   r7   r\  r-  s       rr   rf  z#Scan.inner_fn_free_unbacked_symbols  O    DKK(T--t{{;ll5&),T]]C@@rs   T)can_fallback_to_atenc               |   g |d  |dz   d  |   gt         j                  j                  |t        j                        sd gt        |      z  S t        |      dkD  r=t         j                  j                  |t        j                        sd gt        |      z  S t         j                  j                  }
|
j                  t                    }t        |      t        |      k(  sJ |
j                  t        j                  |d            r?t        t        |            D cg c]!  }t        j                  |||   ||   |      # c}S | j!                  ||d   |d   ||      \  }}t"        }|dkD  rHt$        j&                  j(                  d u xr t        |      dk(  }|s|rd gt        |      z  S d}nt*        }fd}t        t        |            D cg c]0  }t,        j                   |d|||   |||   ||||||d|	      2 }}|D ]  }|j/                           |S c c}w c c}w )Nr2   r  r   )r   r   r\  axispointwise_rangesr  r  
scan_numelc                    t        |      t              k(  sJ t        |       t              k(  sJ g | d  || d  S r|   r   )r   
scan_indexr3  r4  r  s     rr   r   zScan.create.<locals>.reindexT  S    z?c+&6666u:%5!6666>U5D\>J>tu>>rs   )r   r   r  r\  r  r   r^  r  r  r   r  r  rw   )rP   r   rt  r5   SCANr   TUPLE_REDUCTIONr   r  rL   r  rh   Ler   r  r  r  r  rb   versionhip	SplitScanrX   r  )r  r   r  r  r   r3  r  r  r1  r   r   r5  r  r  	scan_typesupports_splitr   r  r"  r4  r  s        `             @@rr   r  zScan.create  sq    =T%4[<4q
+;<Dzlww""6>+>+>?6CK''v;?177#6#6N22$
 6CK''77##&&}['AB
6{c)n,,, ++EHHZ,CD %*#f+$6 !   ! .&|4	 !   &)^^)q\-#!! &4 	&
"
 	>"]]..$6K3v;!;KN!' 6CK//!"J%		?. !&c&k 2%
$ #  ! .!&|4'+ +)##1!- 
 
*  	FNN	 {J
s   )&H4$5H9c	           
     L    fd}	t         j                  ||||	||d|      S )Nc                ,     g | d  || d        S r|   rw   )r   r  r3  r\  s     rr   r  z#Scan.num_splits.<locals>.wrapper_fn  s*    Fc%4jF=F3tu:FGGrs   r  )r   r  r  r\  r^  r  r  r  )r  r  )
r  r   r   r\  r3  r4  r  r  r5  r  s
      ``     rr   r  zScan.num_splitss  s;    	H ###( & $ 	
 		
rs   rW  )r   rZ  r  r  r  'Tuple[Callable[[List[Expr]], Any], ...]r   r]  r3  rl   r  r  r  r?   r1  r   ru   List[Optional[TensorBox]])r   rZ  r   r  r\  zCallable[[List[Expr]], Any]r3  rl   r4  r]  r  r]  r  r  r5  r   )r   r%  r&  r'  r"  r   r  r  r  r   r}  r  r  rf  r  r?   r$  r  r  r  r  s   @rr   r  r    sY   
MM;;!!##--
 O 8A  )6(=(=] &*]] (] ;	]
 ] ] R] &] #] 
#] ]~ 

 
 .	

 
 %
  
 R
 
 
rs   r  c                      e Zd Zy)r?  N)r   r%  r&  rw   rs   rr   r?  r?    s    rs   r?  c                      e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   d fdZ fdZd Zd Zd Zd Z	d Z
d Zd Zd Zeej                   f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z xZS )Sortr]  sort_rangesr   r  r   r?   r  rl   r  r  r  r  r  r   stable
descendingc                    t         |           t               j                  d | j                  D         z   t               j                  d | j
                  D         z  S )Nc              3  2   K   | ]  }t        |        y wr|   ra  rb  s     rr   rI  z0Sort.get_unbacked_symbol_uses.<locals>.<genexpr>  r  rd  c              3  2   K   | ]  }t        |        y wr|   ra  rb  s     rr   rI  z0Sort.get_unbacked_symbol_uses.<locals>.<genexpr>  r  rd  )rp  r"  r-   re  rI  r   rq  s    rr   r"  zSort.get_unbacked_symbol_uses  sY    G,. jl  "VTEUEU"VWX jl  "OTYY"OPQ	
rs   c                    t        | j                        t        | j                        z   t        | j                        k(  sJ t        |           y r|   )r   r^  rI  r   rp  r   rq  s    rr   r   zSort.__post_init__  r  rs   c                .   | j                  ||      }| j                  D cg c]
  } ||       }}t        j                  | j                  || j
                  | j                        }t        j                  | ||      || j                           S c c}w r|   )	r   r  rN   sortr  rJ  rK  r  r  )	r   r  r  r  	sort_varsr   r\  ra   r"  s	            rr   r  zSort.store_reduction  su    ll4+04?H(3-??$++vt{{DOOLyygclF4;L;L4MNN @s   Bc                     y)NrQ  rw   r   s    rr   r  zSort.get_reduction_type  s    rs   c                    | j                   S r|   )rI  r   s    rr   r  zSort.get_reduction_size  ry  rs   c                    | j                   S r|   r'  r   s    rr   r   zSort.get_size  r(  rs   c                    | j                   S r|   r{  r   s    rr   r}  zSort.get_pointwise_size  rv  rs   c                X    t        | j                        t        | j                        z   S r|   )r   r^  rI  r   s    rr   r  zSort.index_length  r+  rs   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }|fS r|   )r  r^  rI  r1   r  r   r-  s       rr   r  zSort.inner_fn_args  r.  rs   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }t        | j                  |      S r|   )r  r^  rI  r1   r  r   r7   r\  r-  s       rr   rf  z#Sort.inner_fn_free_unbacked_symbols  r0  rs   c	                   g |d  |dz   d  |   gt         j                  j                  |t        j                        sd gt        |      z  S t         j                  j                  }
|
j                  t                    }d}t        j                  j                  xr% |
j                  t        j                  ||            }|sd gt        |      z  S t        |      t        |      k(  sJ |
j                  t        j                  |d            r?t        t        |            D cg c]!  }t         j#                  |||   ||   |      # c}S fd}t        t        |            D cg c]4  }t$        j#                  t'        d|||   |||   |||||||d|	      6 }}|D ]  }|j)                           |S c c}w c c}w )Nr2   r+  r  c                    t        |      t              k(  sJ t        |       t              k(  sJ g | d  || d  S r|   r7  )r   
sort_indexr3  r4  rI  s     rr   r   zSort.create.<locals>.reindex  r9  rs   )r   r   r  r\  r  r   r^  rI  r   r  r  rJ  rK  rw   )rP   r   rt  r5   SORTr   r   r  rL   r3   tritonpersistent_reductionsr  rh   r<  r   r  r  rX   rH  r  )r  r   r  r  r   r3  rJ  rK  r  r   r   
sort_numel
max_rblockis_persistent_kernelr  r   r  r"  r4  rI  s        `            @@rr   r  zSort.create  s    =T%4[<4q
+;<Dzlww""6>+>+>?6CK''77##&&}['AB
 
MM// S00*j1QR 	 $6CK''6{c)n,,, ++EHHZ,CD %*#f+$6 !   ! .&|4	 !  	?0 !&c&k 2'
& %  ! .!&|4'+ +##1!-!) 
 
,  	FNN	 Q
s   2&G89GrW  )r   rZ  r  r  r  rD  r   r]  r3  rl   rJ  r   rK  r   r  r?   ru   rE  )r   r%  r&  r'  r"  r   r  r  r  r   r}  r  r  rf  r  r?   r$  r  r  r  s   @rr   rH  rH    s     
;;!!##--L
 O 8A  )6(=(=LL (L ;	L
 L L L L &L 
#L Lrs   rH  c                >    	 t        | d       y# t        $ r Y yw xY w)NFfreezeT)as_storage_and_layoutr   r   s    rr   r   r   "  s&    a. s    	c                    	 t        | d      \  }}|j                         r|j                          |j                         S # t        $ r Y yw xY wNFrd  )rf  should_pad_stridespad_stridesis_contiguousr   )r   bufferrZ  s      rr    is_contiguous_storage_and_layoutrm  *  sR    .q? $$& ##%% s   ?A 	AAc                   t        | t              rt        | j                  |||||      S t        | t              rt        | j                  t
              r|r|r@| j                  j                          | j                  j                  j                         s\J || j                  j                  ||       n:|| j                  j                  ||       n| j                  j                          | | j                  j                  fS t        | t              r(t        | j                  |      \  }}|| j                  fS t        )z
    Try to simplify x into a StorageBox and a Layout.

    allow_padding only affect how we apply stride_order. When allow_padding
    is True, we have the freedom to add padding when applying the stride_order.
    re  want_contiguousstride_orderallow_paddingexact_stridesrr  rd  )r]   rX   rf  r[  
StorageBoxBufferfreeze_layoutrZ  rk  freeze_layout_with_stride_order freeze_layout_with_exact_stridesrg  ReinterpretViewr   )r   re  rp  rq  rr  rs  rl  r   s           rr   rf  rf  6  s     !Y$FF+%''
 	
 !Z Z%?$$&vv}}22444)66  7  *77! 8  $$&!&&--!_% *FF
	 qxx
rs   )rp  c                d    	 t        | d      \  }}|j                  |      S # t        $ r Y yw xY wrh  )rf  is_stride_orderedr   )r   rq  rl  rZ  s       rr   "is_stride_order_storage_and_layoutr}  m  s:    .q?''55 s    # 	//c                      e Zd ZU ded<   d Zd Zd Zd Zed        Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd ZddZd Zd Zd Zy)BaseViewrY   r[  c                6    | j                   j                         S r|   r[  r"  r   s    rr   r"  z!BaseView.get_unbacked_symbol_uses{      yy1133rs   c                    t        d|        )Nzmake_reindexer NYI on r   r   s    rr   make_reindexerzBaseView.make_reindexer~  s    !$:4&"ABBrs   c                j    | j                   j                         | j                         fd}|S )Nc                       |             S r|   rw   r   innerr   s    rr   r  z&BaseView.make_indexer.<locals>.indexer      &&rs   )r[  r  r  )r   r  r  r   s     @@rr   r  zBaseView.make_indexer  s/    		&&(%%'	' rs   c                j    | j                   j                         | j                         fd}|S )Nc                       |             S r|   rw   r  s    rr   r  z$BaseView.make_loader.<locals>.loader  r  rs   )r[  r  r  )r   r  r  r   s     @@rr   r  zBaseView.make_loader  s/    		%%'%%'	' rs   c                .    | j                   j                  S r|   r[  r   r   s    rr   r   zBaseView.dtype      yyrs   c                6    | j                   j                         S r|   r[  r   r   s    rr   r   zBaseView.get_layout      yy##%%rs   c                6    | j                   j                         S r|   )r[  r   r   s    rr   r   zBaseView.get_device  r  rs   c                     y r|   rw   r   s    rr   r5  zBaseView.get_origin_node  r  rs   c                6    | j                   j                         S r|   r[  r  r   s    rr   r  zBaseView.get_name      yy!!##rs   c                "    | j                         S r|   r  r   s    rr   r}  zBaseView.get_pointwise_size      }}rs   c                8    | j                   j                  |      S r|   )r[  r   r   userss     rr   r   zBaseView.mark_reuse  s    yy##E**rs   c                6    | j                   j                         S r|   )r[  r  r   s    rr   r  zBaseView.has_exceeded_max_reads  s    yy//11rs   c                6    | j                   j                         S r|   r[  r  r   s    rr   r  zBaseView.realize      yy  ""rs   c                6    | j                   j                         S r|   )r[  r!  r   s    rr   r!  zBaseView.realize_hint  s    yy%%''rs   c                6    | j                   j                         S r|   )r[  r  r   s    rr   r  zBaseView.get_storage_numel  s    yy**,,rs   c                6    | j                   j                         S r|   )r[  r<  r   s    rr   r<  zBaseView.is_extern      yy""$$rs   c                6    | j                   j                         S r|   )r[  is_module_bufferr   s    rr   r  zBaseView.is_module_buffer      yy))++rs   c                6    | j                   j                         S r|   r[  r   r   s    rr   r   zBaseView.get_read_names      yy''))rs   c                    t        j                  t        dd      5  t        | j	                         | j                               j                  cd d d        S # 1 sw Y   y xY wr  )r   r   r   r9   r  r   rK  r   s    rr   r  zBaseView.get_reads  sL    \\.*:DA 	&  " e		 	 	s   2AA!c                d    | }t        |t              r|j                  }t        |t              r|S r|   )r]   r  r[  )r   r   s     rr   unwrap_viewzBaseView.unwrap_view  s+    H%A H%rs   c                    | j                         } t        j                  t        d|      |      }t	        || j                         || j                               S r  )r  r   r   r  r  r   r   r  s      rr   r  zBaseView.constant_to_device  sH    !!#Hn.?HP!164==?KKrs   Nr#  )r   r%  r&  r'  r"  r  r  r  r+  r   r   r   r5  r  r}  r   r  r  r!  r  r<  r  r   r  r  r  rw   rs   rr   r  r  w  s    
L4C  &&$+2#(-%,*Lrs   r  c                  D    e Zd ZU ded<   ed        Zed        Zd Zd Z	y)re   r]  r   c                   t         j                  j                  }t        t	        t
        j                  |            }| j                         }dgt        |      t        |      z
  z  t        |      z   }t        |      t        |      k(  sJ t        t        |            D ]N  }||   dk(  r||   J ||   ||<   ||   ||   dk(  r)|j                  ||   ||   z
  d      dk(  rIJ d        |S )zReplace `-1` with correct sizesNrV  r2   r   r  zKBroadcast failed in ExpandView({x.get_size()}, {new_size}) on dimension {i})rP   r   r   r^   r   rh   r  r   r   r   r   )r   new_sizer   old_sizer   s        rr   _normalize_sizezExpandView._normalize_size  s    77##ELL(34::<6S]S]:;d8nL8}H---s8}% 	aA{b {...&qk!$q(8 &&x{Xa['@1&MQRRa`aR	a rs   c                   | j                  ||      }t        |      rt        |      \  }}t        |      t        |j                        z
  }|dk\  sJ t        j                  d      g|z  }t        |j                  |j                        D ]0  \  }}|j                  |dk7  r|nt        j                  d             2 t        |j                  |j                  t        |      ||j                        }	t        ||	      S t!        ||      S Nr   r2   )r  r   rf  r   r   rh   r   r   r   re  r  r   r   r^   offsetrz  re   )
r  r   r  storage
old_layoutskip
new_strider   r   
new_layouts
             rr   r  zExpandView.create  s    &&q(3 #"7":GZx=3z#77D199--*+d2J #J$5$5z G M!!DAI&5==;KLM$!!  X!!J #7J77!X&&rs   c                    | j                   S r|   r'  r   s    rr   r   zExpandView.get_size	  r(  rs   c                    | j                         }| j                  j                         t        |      t              z
  fd}|S )Nc                    t        | d        } t        |       t              k(  sJ t        t                    D ]#  }|   dk(  st        j                  d      | |<   % | S r  )r^   r   r   rh   r   )r   r   actualr  s     rr   r   z*ExpandView.make_reindexer.<locals>.reindex	  se    tu&Eu:V,,,3v;' 0!9>$}}Q/E!H0 Lrs   )r   r[  r   )r   targetr   r  r  s      @@rr   r  zExpandView.make_reindexer	  s>    ##%6{S[(	 rs   N)
r   r%  r&  r'  r(  r  r  r  r   r  rw   rs   rr   re   re     s8    
 0 ' '*rs   re   c                  D    e Zd ZU ded<   ed        Zed        Zd Zd Zy)PermuteViewr]  dimsc           
        | j                  |      }t        |      t        t        t        |                  k(  sJ t	        |      r}t        |      \  }}t        |j                  |j                  |D cg c]  }|j                  |    c}|D cg c]  }|j                  |    c}|j                        }t        ||      S t        ||      S c c}w c c}w r|   )_map_neg_dimsr-   r   r   r   rf  r  r   r   r   r   r  rz  r  )r  r   r  r  r  r   r  s          rr   r  zPermuteView.create	  s      &$:eCI.>#???? #"7":GZ$!!  -12#2/34!""1%4!!J #7J771d## 34s   5CCc                R    |D cg c]  }|dk\  r|nt        |      |z    c}S c c}w r  r7  )r  r  r	  s      rr   r  zPermuteView._map_neg_dims/	  s)    @DEsaxSY_4EEEs   $c                   t        | j                  | j                              t        t        t	        | j                                    k(  sJ | j
                  j                         }| j                  D cg c]  }||   	 c}S c c}w r|   )r-   r  r  r   r   r[  r   )r   r   r   s      rr   r   zPermuteView.get_size3	  so    $,,TYY78J#dii.!=
 
 	
 
 yy!!#!%+AQ+++s   7Bc                B   t        | j                        D ci c]  \  }}||
 c}}t        t        | j                              D cg c]  }|   	 c}t	              t	        t        t        | j                                    k(  sJ fd}|S c c}}w c c}w )Nc                4    D cg c]  }| |   	 c}S c c}w r|   rw   )r   r   invs     rr   r   z+PermuteView.make_reindexer.<locals>.reindex?	  s    &)*E!H***s   )r   r  r   r   r-   )r   r   jr   r  s       @rr   r  zPermuteView.make_reindexer:	  s}     )$)) 451q!t5$S^45!s1v5#*U3tyy>-B"CCCC	+  65s   BBN)	r   r%  r&  r'  r  r  r  r   r  rw   rs   rr   r  r  	  s:    
$ $" F F,rs   r  c                  :    e Zd Zeddd       Zedd       Zd Zy)SqueezeViewN)r	  c          	        t        |      rt        |      \  }}g }g }|6t        |t              sJ d       d|k  r|t	        |j
                        k  sJ t        t        |j
                  |j                              D ]g  \  }\  }}	|)|dk7  s|j                  |       |j                  |	       4||k7  r#|j                  |       |j                  |	       \|dk(  rbJ d        t        |j                  |j                  |||j                        }
t        ||
      S |8t        j!                  ||j#                         D cg c]
  }|dk7  s	| c}      S |j#                         |   dk(  sJ t        j!                  |t        |j#                               D cg c]  \  }}||k7  s| c}}      S c c}w c c}}w )Nzexpected integer dim argumentr   r2   zexpected squeezed size to be 1)r   rf  r]   rl   r   r   r   r   r   re  r  r   r   r  rz  r  r  r   )r  r   r	  r  r  r  r  r   r   r   r  r   s               rr   r  zSqueezeView.createF	  s    #"7":GZHJ!#s+L-LL+CxC#joo*>$>>>%.s:??JDUDU/V%W 
K!>D&;qy -"))&1Cx -"))&1#qyJ*JJy
K %!!  !!J #7J77;;;qajjl"Ea1f1"EFF::<$)));;q1::<1H"UAAQTH1"UVV #F #Vs   
F;
F;
!G /G c                    | D cg c]
  }|dk7  s	| }}t        |       D cg c]  \  }}|dk7  s| c}}t        |       dfd}||fS c c}w c c}}w )Nr2   c                    t        |       t              k(  sJ |  d        t        j                  d      gz  }t        |       D ]
  \  }}|||<    t	        |      S )N r   )r   rh   r   r   r_   )r   r  r   r   lengthnot_ones       rr   r   z%SqueezeView.squeezer.<locals>.reindexr	  sm    u:W-C%'/CC-q)*V3Igu- #Q!"	####rs   )r   zList[sympy.Expr]ru   Tuple[sympy.Expr, ...])r   r   )r   r   r  r   r   r  r  s        @@rr   squeezerzSqueezeView.squeezerl	  s]    #.!qAvA..!*4;AAF1;T	$    /;s   
AAAAc                    t        d      )Nzuse SqueezeView.create())AssertionError)r   r[  s     rr   r  zSqueezeView.__init__{	  s    788rs   )r   r  )r   r%  r&  r  r  r(  r  r  rw   rs   rr   r  r  E	  s3    " #W #WJ ! !9rs   r  c                  P    e Zd ZU ded<   ded<   d Zd ZddZeZed        Z	d	 Z
y
)GenericViewr]  r   r[  r   c                    | j                   S r|   )r   r   s    rr   r  zGenericView.make_reindexer	      ||rs   c                   t        t        | j                              D cg c]  }t        t        j
                  |       }}t        | j                  |            }ddj                  t        t        |             d| S c c}w )Nzlambda , : )r   r   r   rK   r1   r  r^   r   r   r   r   )r   r  	index_old	index_news       rr   reindex_strzGenericView.reindex_str	  ss    CHTYYCX
>?*4::q9
	 
 i01	3sI#6789+FF	
s   !Bc                z    | j                  | j                  d| j                   d| j                          g      S )Nsize=zreindex=)r   r[  r   r  r   s    rr   rm  zGenericView.__str__	  s=    YY%		{+x8H8H8J7K-LM
 	
rs   c                (     | |t        |      |      S r|   )r^   )r  r   r  r   s       rr   r  zGenericView.create	  s    1d8ng..rs   c                    | j                   S r|   r'  r   s    rr   r   zGenericView.get_size	  r(  rs   NrT  )r   r%  r&  r'  r  r  rm  r  r  r  r   rw   rs   rr   r  r  	  s<    
G

 H/ /rs   r  c                  \    e Zd Zed        Zed        Zed        Zed        Zed        Z	y)r  c                    t        j                  |       } t        j                  |      }t        j                  j                  j
                  j                  } |t        j                  | d            r| |z   } | S r  )rh   r  rP   r   r   r   evaluate_exprLt)r   r   r  s      rr   handle_negative_indexzView.handle_negative_index	  sZ    ll3||D!((22@@#q)**C
rs   c                  	 t        |t        t        f      sJ | j                  |j	                         |      \  	}t
        j                  j                  j                  	|      r|S d}t        t        	            dkD  st        t        |            dkD  rd}d|v r	fd} | |t        |      |      S t        |      s|r||r t        |      st        j                  |      }t        |      \  }}t        |j                   |j"                  |t$        j'                  |      |j(                        }t+        ||      S | j-                  	|      } | |t        |      |      S )NFr   Tc                4    t        dgt              z        S r  )r_   r   )r   r  s    rr   fake_reindexz!View.create.<locals>.fake_reindex	  s    aS3x=011rs   )r]   r_   r^   resolve_negative_sizer   rP   r   r   statically_known_list_equalsr   r(   rm  ExternKernelrealize_input as_contiguous_storage_and_layoutr  r   r   r   r   r  rz  r  )
r  r   r  unbacked_symbols_in_sizesr  r  r  r  r   r  s
            @rr   r  zView.create	  sJ   (UDM222 66qzz|XN( 77888LH$)!%h/014(23a7(,%=2 q$x.,77-a04M(2RST2U !..q1"B1"EGZ$!!  11(;!!J #7J77--hA1d8ng..rs   c                <   |D cg c]+  }t         j                  j                  j                  |      - }}| D cg c]+  }t         j                  j                  j                  |      - } }t	        |      }t        t        |            D ]E  }||   dk(  st        j                  d      ||<   t        t        |       t        |            ||<    n t         j                  j                  j                  t        |       t        |             | |fS c c}w c c}w )NrV  r2   )rP   r   r   r  r^   r   r   rh   r   r.   rL   guard_equals)r  r  r   r   s       rr   r  zView.resolve_negative_size	  s    :BCQAGG$$--a0CC:BCQAGG$$--a0CC>s8}% 	A{b #mmA.&}X'>h@WX		 	
%%mH&=}X?VW!! DCs
   0D0Dc                    	 | j                  ||      }|S # t        t        f$ r@ t        |      g}| j                  ||      }| j                  ||      }t	        ||      }Y |S w xY wr|   )_dynamic_reshape_indexerr  
IndexErrorrL   r   )r  r  r  r   flatr   r   s          rr   r  zView.dynamic_reshape_indexer	  sx    	:228XFG  
+ 	:!(+,D33HdCH33D(CH%h9G	:s    AA%$A%c                (   t         j                  j                  j                  }t	        t        |            D cg c]  }t        t        j                  |       c}t        t        |            }t        |       }g |r3|r0|j                         }|j                         \  }}|dk(  r9j                  t        j                  d             |j                  ||f       n|dk(  r|j                  |       n ||       ||      k(  r=j                  |       t         j                  j                  j                  ||       nc ||       ||      k  r ||       ||      k  r2|j                         \  }	}
|	|z  |z   }||
z  } ||       ||      k  r2j                  |       t         j                  j                  j                  ||       n ||       ||      kD  rt        j                  d      }|}j                  t!        |||             ||z  } ||       ||      kD  rH|j                         }j                  t!        |||             ||z  }||z  } ||       ||      kD  rHt         j                  j                  j                  ||       nt"        |r|r0|ra|j                         }t         j                  j                  j                  |d       j                  t        j                  d             |ra|r@|j                         \  }}t         j                  j                  j                  |d       |r@j%                          t              t        |       k(  sJ fd}|S c c}w )zG
        Perform a reshape entirely by modifying indexing math
        r2   r   c                    t        |       t              k(  sJ t        |       t              f       t        t        |             t        fdD              S )Nc              3  6   K   | ]  }t        |        y wr|   )rM   )rG  r   replacementss     rr   rI  zAView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>,
  s     HA|4Hs   )r   r`   r   r_   )r   r  r  	view_exprs    @rr   r   z.View._dynamic_reshape_indexer.<locals>.reindex)
  sM    u:T*CSZT,CC*D% 01LHiHHHrs   )rP   r   r   r   r   r   rK   r1   VIEWr^   r   r  re  rh   r   r  r0   r  reverse)r  r  r   r   	stack_new	stack_oldsize_oldvarsize_newvar2	size_new2divisormodulusr   r  r  s                 @@rr   r  zView._dynamic_reshape_indexer	  s   
 GG$$..	 CHHBV
=>*499a8
 T8,-	N		I }}H%MMOMC1}  q!12  #x1Q  *8$	((;;  %  --hA8$y'::)Ih,??&/mmoOD)/C/C')3H  )Ih,??   %  --hA8$y'::--*"  gw!GH!G+)Ih,??'mmoG$$_S'7%KL%/G''1H	  )Ih,??
   --hA$$= I@  }}HGG))(A6U]]1-. 
 %MMOMCGG))(A6  	9~X...	I
 s
s   !NN)
r   r%  r&  r(  r  r  r  r  r  r  rw   rs   rr   r  r  	  sg      '/ '/R " " 	 	 @ @rs   r  c                       e Zd ZU dZded<    fdZddZeZd Zd Z	d Z
ed	        Zd
 Zd Zd Zd Zd Zd ZddZddZd Z xZS )rz  z*Pretend our storage has a different layoutLayoutrZ  c                    t         |           t        | j                  t              r | j                  j                         | _        y y r|   )rp  r   r]   r[  r  r  rq  s    rr   r   zReinterpretView.__post_init__7
  s5    dii*		--/DI +rs   c                P    | j                  | j                  | j                  g      S r|   )r   r[  rZ  r   s    rr   rm  zReinterpretView.__str__<
  s&    		
 	
rs   c                6    | j                   j                         S r|   r  r   s    rr   r  zReinterpretView.get_nameF
  r  rs   c                .    | j                   j                  S r|   rZ  r   r   s    rr   r   zReinterpretView.get_deviceI
      {{!!!rs   c                     y r|   rw   r   s    rr   r5  zReinterpretView.get_origin_nodeL
  r  rs   c                .    | j                   j                  S r|   )rZ  r   r   s    rr   r   zReinterpretView.dtypeO
  s    {{   rs   c                @    t        | j                  j                        S r|   r^   rZ  r   r   s    rr   r   zReinterpretView.get_sizeS
      DKK$$%%rs   c                @    t        | j                  j                        S r|   r^   rZ  r   r   s    rr   r  zReinterpretView.get_strideV
      DKK&&''rs   c                      fd}|S )Nc                T   j                   j                         }t        j                  j	                          ||             }j                   j
                  j                  j
                  k7  r5t        j                  |j
                  j                  j
                        S |S r|   )rZ  r  rN   loadr  r   r[  to_dtype_bitcast)r   r  
tmp_loaderr   s      rr   r  z+ReinterpretView.make_loader.<locals>.loaderZ
  sp    kk..0G$--/75>BJ{{  DIIOO3++J

DIIOOTT!!rs   rw   r   r  s   ` rr   r  zReinterpretView.make_loaderY
  s    	" rs   c                6    | j                   j                         S r|   rZ  r  r   s    rr   r  zReinterpretView.make_indexerd
      {{''))rs   c                    | j                   S r|   rZ  r   s    rr   r   zReinterpretView.get_layoutg
  rv  rs   c                     y r|   rw   r   s    rr   rw  zReinterpretView.freeze_layoutj
      rs   c                    t        | j                  j                        t        | j                  j                        z  t        | j                  j                        z  S r|   )r(   rZ  r   r   r  r   s    rr   r"  z(ReinterpretView.get_unbacked_symbol_usesm
  sG    !$++"2"23#DKK$6$678#DKK$6$678	
rs   c                   t         j                  j                  j                  | j                  | j
                  j                  | j
                  j                  | j
                  j                  || j
                  j                        S r  )
rP   r   wrapper_codecodegen_reinterpret_viewr[  rZ  r   r   r  r   r  s     rr   r  z!ReinterpretView.codegen_referencet
  sd     ww##<<IIKKKKKK++## = 
 	
rs   c                     yr  rw   r   s    rr   r  zReinterpretView.num_reads
      rs   rT  rW  r|   )r   r%  r&  __doc__r'  r   rm  r  r  r   r5  r+  r   r   r  r  r  r   rw  r"  r  r  r  r  s   @rr   rz  rz  1
  sl    4N0

 H$" ! !&(	*

rs   rz  c                  T    e Zd ZU dZded<   ed        Zd
dZeZe	d        Z
d Zd Zy	)	DtypeViewz(Pretend our storage has a different typer  target_dtypec                    t        |      rQt        |      \  }}t        |j                  ||j                  |j
                  |j                        }t        ||      S t        ||      S r|   )	r   rf  r  r   r   r   r  rz  r.  )r  r   	new_dtyper  r  r  s         rr   r  zDtypeView.create
  sd     #"7":GZ$!!!!!!J #7J77I&&rs   c                P    | j                  | j                  | j                  g      S r|   )r   r[  r/  r   s    rr   rm  zDtypeView.__str__
  s     		4+<+<=>>rs   c                    | j                   S r|   )r/  r   s    rr   r   zDtypeView.dtype
  s       rs   c                6    | j                   j                         S r|   r[  r   r   s    rr   r   zDtypeView.get_size
  r  rs   c                J      j                   j                          fd}|S )Nc                z    t        j                   |       j                  j                  j                        S r|   )rN   r  r/  r[  r   )r   r  r   s    rr   r  z%DtypeView.make_loader.<locals>.loader
  s*    ''c
D4E4EtyyWWrs   )r[  r  )r   r  r  s   ` @rr   r  zDtypeView.make_loader
  s"    		%%'	X rs   NrT  )r   r%  r&  r,  r'  r  r  rm  r  r+  r   r   r  rw   rs   rr   r.  r.  
  sE    2' '? H! !$rs   r.  c                  .    e Zd Zed        Zedd       Zy)	SliceViewc                     t         j                  j                  |j                         |   t	        d ||fD              rd nfd fd} ||dd      } |||      }||fS )zz
        Normalize start and end such that both are in the range
        [0, x.get_size()[dim]] and start <= end.
        c              3  2   K   | ]  }t        |        y wr|   ra  rG  r   s     rr   rI  z0SliceView.normalize_start_end.<locals>.<genexpr>
  s     HA$Q'Hrd  c                V    t        j                  t        j                  | |      |      S r|   )rh   MinMax)r   loweruppers      rr   clampz,SliceView.normalize_start_end.<locals>.clamp
  s    yy1e!4e<<rs   c                H    j                  j                  | |      |      S r|   )evaluate_minevaluate_max)r   r@  rA  r   s      rr   rB  z,SliceView.normalize_start_end.<locals>.clamp
  s#    ,,X-B-B1e-LeTTrs   c                D    | |S j                  |       }  | ||      S r|   )r  )r  r@  rA  r  rB  r  dim_sizes       rr   
clamp_wrapz1SliceView.normalize_start_end.<locals>.clamp_wrap
  s.    {++C:CeU++rs   r   )rP   r   r   r   r  )	r  r   r	  startendrH  rB  rG  r   s	   `     @@@rr   normalize_start_endzSliceView.normalize_start_end
  su     77##::<$H%h1GHH=
U	, 5!Xq1eXx8czrs   c           	        t        j                        t        t         j                        sdkD  sJ 	 dk(  r|dk\  rdk(  r|S t
        j                  j                  }t        |j                               |r| j                  ||      \  }t        |z
  dz
  z         <   t        |      rzt        |      \  }}	t        |	j                        }
|
   z  |
<   t        |	j                   |	j"                  |
|	j$                  |	j                     z  z         }t'        ||      S fd}t)        ||      S # t        $ r Y w xY w)Nr   l    r2   c                    t        |       t              k(  sJ d|  d        t        |       } |    z  z   | <   | S )Nzwrong ndim r  )r   r^   )r   r	  r  rI  steps    rr   r   z!SliceView.create.<locals>.reindex
  sP    u:X.P+eWAhZ0PP.KEsd*U2E#JLrs   )r   r   )rh   r  r]   r   	TypeErrorrP   r   r   r^   r   rK  r/   r   rf  r   r  r   r   r  rz  r9  )r  r   r	  rI  rJ  rN  rB  r   r  r  r  r  r   r  s     `` `       @rr   r  zSliceView.create
  sY   ||D!$

+tax77	zcY.419 77##

%
 00CDJE3 uq!94@ #"7":GZj//0J(o4JsO$!!  !!J$5$5c$:U$BBJ #7J77	 7;;G  		s   E 	EEN)r2   T)r   r%  r&  r  rK  r  rw   rs   rr   r9  r9  
  s(     8 )< )<rs   r9  c                  L    e Zd ZU ded<   ded<   d Zd Zd Zd Zd	 Zd
 Z	d Z
y)BaseConstantr  r   rZ  r   c                     yNrw   rw   r   s    rr   r   zBaseConstant.get_size
      rs   c                    | j                   S r|   ru  r   s    rr   r   zBaseConstant.get_device   rv  rs   c                     y r|   rw   r   s    rr   r5  zBaseConstant.get_origin_node  r  rs   c                     y r|   rw   r  s     rr   r   zBaseConstant.mark_reuse  r%  rs   c                     yr;  rw   r   s    rr   r  z#BaseConstant.has_exceeded_max_reads	  r=  rs   c                     yrS  rw   r   s    rr   r  zBaseConstant.get_reads  rT  rs   c                     yr;  rw   r   s    rr   r<  zBaseConstant.is_extern  r=  rs   N)r   r%  r&  r'  r   r   r5  r   r  r  r<  rw   rs   rr   rQ  rQ  
  s1    rs   rQ  c                  >    e Zd ZU ded<   ded<   ded<   d Zd Zd	 Zy
)Constantr   r   r  r   rZ  r   c                      fd}|S )Nc                X    t        j                  j                  j                        S r|   )rN   r  r   r   r   r   s    rr   r  z$Constant.make_loader.<locals>.loader  s    <<

DJJ77rs   rw   r  s   ` rr   r  zConstant.make_loader  s    	8 rs   c                     y r|   rw   r   s    rr   r  zConstant.realize  r%  rs   c                D    t        | j                  | j                  |      S r|   )r\  r   r   r  s     rr   r  zConstant.constant_to_device"  s    

DJJ77rs   N)r   r%  r&  r'  r  r  r  rw   rs   rr   r\  r\    s"    J8rs   r\  c                  8    e Zd ZU ded<   ded<   ded<   d Zd Zy	)
IndexingConstantr   r   r  r   rZ  r   c                      fd}|S )Nc                X    t        j                  j                  j                        S r|   )rN   r  r   r   r_  s    rr   r  z,IndexingConstant.make_loader.<locals>.loader-  s    >>$**djj99rs   rw   r  s   ` rr   r  zIndexingConstant.make_loader,  s    	: rs   c                D    t        | j                  | j                  |      S r|   )rc  r   r   r  s     rr   r  z#IndexingConstant.constant_to_device2  s    

DJJ??rs   N)r   r%  r&  r'  r  r  rw   rs   rr   rc  rc  &  s    J@rs   rc  c           	     b    t        d t        | t        j                  |      |      D              S )Nc              3  <   K   | ]  \  }}}|d k(  xs ||k(    ywrp  rw   )rG  leftrightr   s       rr   rI  z2is_contiguous_strides_for_shape.<locals>.<genexpr>9  s.      D% 		"TU]"   )rd  r   r   r   )r   r  s     rr   is_contiguous_strides_for_shaperl  6  s5      !$N55e<e"
  rs   c                <    t         j                  | j                  z  S r|   )r3   padding_alignment_bytesitemsizer  s    rr   get_align_for_dtyperp  A  s    ))U^^;;rs   c                      e Zd Z ed      f	 	 	 	 	 	 	 	 	 ddZed        ZddZeZd Z	e
d        Zd Zd Zd	 Ze
d
        Zd Zd Zd Zd ZddZddZy)r
  r   c                    |$t        |      t        |      k(  sJ d| d|        || _        || _        t        d |D              sJ || _        || _        || _        y )Nr  	, stride=c              3  H   K   | ]  }t        |t        t        f        y wr|   )r]   r   rl   rq  s     rr   rI  z"Layout.__init__.<locals>.<genexpr>T  s     <!:a$-<    ")r   r   r   rd  r   _strider  )r   r   r   r   r   r  s         rr   r  zLayout.__init__G  sv     ~Tc/
 "
 	+4&	&*	+ 
 
<t<<<<	rs   c                    | j                   S r|   )rv  r   s    rr   r   zLayout.strideY  s    ||rs   c                    d}| j                   dk7  rd| j                    }t        |       j                   d| j                  j                   d| j                   d| j
                   d| j                   | dS )	Nr   r   z	, offset=z('z', z, size=rs  r   )r  rn   r   r   r   r   r   )r   r  s     rr   rm  zLayout.__str__]  sw    ;;! .FDz""#2dkk&6&6%7s4::, GII;i}VHA?	
rs   c                B    t        | j                  | j                        S r|   )rl  r   r   r   s    rr   rk  zLayout.is_contiguoush  s    .t{{DIIFFrs   c                    t        |       }|dvs| d   dk(  ryt        |t        |       |       D ]  \  }}}|dk7  s||k7  s y y)N)r      r2   FT)r   r   r"   )r  r  ndimri  rj  r   s         rr   is_channels_last_contiguousz"Layout.is_channels_last_contiguousk  s_    5zvqQ!$3E:E"
 	D% qyTU]		
 rs   c                    t        | j                  t        t        j	                  t        t        | j                                          | j                        D ]  \  }}}|dk7  s||k7  s y y)Nr2   FT)r   r   reversedr   r   r^   r   )r   ri  rj  r   s       rr   is_transposedzLayout.is_transposedw  sc    !$KK^66tHTYY<O7PQRII"
 	D%
 qyTU]	 rs   c                   t        | j                        t        |      k(  sJ t        | j                        D cg c]5  \  }}t        j
                  j                  j                  |d      dk7  r|7 }}}|D cg c]  }| j                  |    }}|D cg c]  }||   	 }}d } ||      }dgt        |      z  }t        t        |            D ]4  }t        j
                  j                  j                  ||         |||   <   6 t        t        |      dz
        D ]  }||   ||dz      kD  s y yc c}}w c c}w c c}w )Nr   r  r2   c                `    t        |       }| D cg c]  }|j                  |       c}S c c}w r|   )rc  r   )arr
sorted_arrelements      rr   sorted_indicesz0Layout.is_stride_ordered.<locals>.sorted_indices  s*    J=@A'J$$W-AAAs   +rV  FT)	r   r   r   r   rP   r   r   r   r   )r   r   r   r	  r
  r   r  stride_ordereds           rr   r|  zLayout.is_stride_ordered  sM   4;;3u:---
 $DII.
3ww))#):a? 
 
 +88Q$++a.88#01aq11	B
 u% E
*s5z" 	MA'(ww'7'7'A'A&)'LN58$	M s5zA~& 	Aa >!a%#88	 1
 91s   :D9=D?Ec                    dgt        t        t        dt        | j                        dz
                    z   }t        |      g|z   }| j                  |      S r  )r^   r  r   r   r   r|  r   r   s     rr   is_channels_last_stride_orderedz&Layout.is_channels_last_stride_ordered  sN    d8E!S-=-A$BCDDUu$%%e,,rs   c                   t        |      }t        |       dk(  r| S t        j                  st        j                  ||       r| S t        j                         }t        |d      r|j                  j                  dd      r| S t        d t        j                  | |      D              s| S t        |       }t        |      }t!        t        |             D cg c]  }d }}d||d   <   d}	t#        |dd d      D ]I  \  }
}||
dz
     }||   ||   z  }|t        j$                  kD  r||z  dk7  rt'        ||      |z  }d	}	|||<   K |	s| S t(        xj*                  dz  c_        |S c c}w )
z
        The padding does not change stride order but makes sure all strides larger
        than the threshold are multiple of align.
        r   metadislike_paddingFc              3  \   K   | ]$  }t        |t        t        j                  f       & y wr|   r'  rq  s     rr   rI  z&Layout._pad_strides.<locals>.<genexpr>  s(      
 q3./
s   *,r2   N)rI  T)rp  r   r3   pad_channels_lastr
  r}  rP   get_current_noder4  r  getrd  r  chainr   r   r   r   padding_stride_thresholdrB   r   num_comprehensive_padding)
in_stridesr   r   aligncurrent_fx_noderq  r   r   new_stridespaddedrankr   prev_idxr   s                 rr   _pad_strideszLayout._pad_strides  s    $E*z?a''F,N,N*-
 ,,.?F+0D0D0H0Hu1
   
__Z6
 
 '
3,\:
"'J"89Qq99 &'JqM"":ab>; 	&ID#!$(+H *T(^;F777FUNa<O /%7%K	&  ))Q.)- :s   	E&c                    t        | t              sJ | j                  J | j                  | j                  | j                  | j
                        | _        y r|   )r]   r   rv  r  r   r   r   s    rr   rj  zLayout.pad_strides  sD    $///||'''((tyy$**Mrs   c                F    t         j                  xr t        | t              S r|   )r3   comprehensive_paddingr]   r   r   s    rr   ri  zLayout.should_pad_strides  s    ++P
40PPrs   c                    t        | t              r| S | j                         r| j                          t        | j                  | j
                  | j                  | j                  | j                        S r|   )	r]   r  ri  rj  r   r   r   r   r  r   s    rr   as_fixedzLayout.as_fixed  sY    dK(K""$KKJJIIKKKK
 	
rs   c                    t         j                  sJ dt        |       j                   d       | j	                         j                         S )Nzconvert z to FixedLayout first)r   r  rn   r   r  r  r   s    rr   r  zLayout.make_indexer  sG    ))	Ad4j))**?@	A)}}++--rs   c                   | j                   |j                   k(  xrj | j                  |j                  k(  xrO | j                  |j                  k(  xr4 | j                  |j                  k(  xr | j                  |j                  k(  S r|   r   r   r   r   r  )r   others     rr   __eq__zLayout.__eq__  so    KK5<<' ,

ekk),		UZZ', u||+, u||+	
rs   c                X    t        | j                  | j                  | j                        S r|   )r   r   r   r  r   s    rr   storage_sizezLayout.storage_size  s    .tyy$++t{{SSrs   N)
r   rZ  r   r  r   r]  r   $Optional[Sequence[Union[Expr, int]]]r  r   rT  )ru   r   ru   z
sympy.Expr)r   r%  r&  r   r  r+  r   rm  r  rk  r(  r}  r  r|  r  r  rj  ri  r  r  r  r  rw   rs   rr   r
  r
  E  s     qz  	
 5 $  
 HG 	 	<- 8 8tN
Q
.
Trs   r
  c                  N     e Zd ZdZd ed      f	 	 	 	 	 	 	 	 	 d fdZd Z xZS )r  z A Tensor layout we cannot changeNr   c                Z    |t         j                  |      }t        |   |||||       y r|   )r   r   rp  r  )r   r   r   r   r   r  rr  s         rr   r  zFixedLayout.__init__  s6     >#66t<F	
rs   c                      fd}|S )z1A closure containing math to read a given elementc                   t        |       t        j                        k(  sJ t        |       t        j                        k(  sJ j                  }t	        | j                  j                        D ]  \  }}}|dk7  s|||z  z   } |S r  )r   r   r   r  r   )r   r"  r   r   szr   s        rr   r  z)FixedLayout.make_indexer.<locals>.indexer!  s    u:T[[!1111u:TYY///[[F#&udkk499#E 3VR7#cFl2F3 Mrs   rw   r   r  s   ` rr   r  zFixedLayout.make_indexer  s    	 rs   )
r   rZ  r   r  r   zUnion[List[Expr], List[int]]r   r  r  zUnion[Expr, int])r   r%  r&  r,  r   r  r  r  r  s   @rr   r  r  	  sL    * 8<#*1:

 
 +	

 5
 !
$rs   r  c                       e Zd ZdZdZed        Zed        Zed        Zed        Z	ed        Z
ddZdd	Zd
 Zd Zd fd	Z xZS )r   z(A Tensor layout we are allowed to changeFc                    t        |       dk(  rg S t        j                  d      g}t        | dd        D ]  }|j	                  ||d   z          t        t        |            S )Nr   r2   rV  )r   rh   r   r  re  r^   )sizesreversed_stridesr   s      rr   r   z!FlexibleLayout.contiguous_strides3  sj    u:?I!MM!,-U12Y' 	AD##D+;B+?$?@	AH-.//rs   c                    t        t        t        |                   t        |      k(  s	J | |f       t        j                  d      }dgt        |      z  }|D ]  }|||<   || |   z  } |S )z
        Create a stride based on the order the dimensions should be filled in.

        In this format, channels last would be:
            [1, 3, 2, 0]
        r2   N)r-   r   r   rh   r   )r  r   next_strider  r   s        rr   fill_orderedzFlexibleLayout.fill_ordered<  sy     %E
+,
50AAQE5>QAmmA&&3u:% 	1A$GAJ%a0K	1 rs   c                    t        t        t        |                   t        |      k(  sJ t        |      }t        j                  | |      S )z
        Create a stride based on the sorted order of a permuted range.

        In this format, channels last would be:
            [3, 0, 2, 1]
        )r-   r   r   r   r   r  )r  r   r   s      rr   r  zFlexibleLayout.stride_orderedM  sB     %E
+,
50AAAA,U3
**5*==rs   c                >   |t         j                  k(  rt        j                  | t              S |t         j
                  k(  rt        j                  | t              S |t         j                  k(  rt        j                  |       S t        j                  d|       t        )aq  
        Create a stride based on a memory format.

        Memory format is translasted into a stride order,
        so channels_last is the same as:
            FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

        This interface does not support memory_format `torch.preserve_format`
        which should be used to deduce a format from another source
        z>stride_ordered_for_memory_format, unsuppored memory_format: %s)rb   channels_lastr   r  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr   r|  r}  r   )r  memory_formats     rr    stride_ordered_for_memory_formatz/FlexibleLayout.stride_ordered_for_memory_formatY  s     E///!008IJJe444!008JKKe555!44U;;IIP &%rs   c                (   t        |       t        |      k(  sJ |D cg c]+  }t        j                  j                  j	                  |      - }}t        t        t        |            |j                        }t        j                  | |      S c c}w )z
        Create a stride that has the same stride order as given stride

        For example, if given stride is [1000, 1, 100, 10],
        the fill order should be [1, 3, 2, 0]
        r5  )
r   rP   r   r   r   rc  r   __getitem__r   r  )r  r   r   r   s       rr   same_orderedzFlexibleLayout.same_orderedr  sv     5zS[(((9?@A!''"",,Q/@@E#f+.F4F4FG
**5*== As   0Bc                   | j                  | j                  |      }| j                         r)|r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                        S r|   )r  r   ri  r  r   r  r   r  )r   r   rr  r  s       rr   as_stride_orderzFlexibleLayout.as_stride_order  sn    ((E:
""$**:tyy$**MJKKJJIIKK
 	
rs   c                    |}| j                         r)|r'| j                  || j                  | j                        }t	        | j
                  | j                  | j                  || j                        S r|   )ri  r  r   r   r  r   r  )r   rs  rr  r  s       rr   as_exact_strideszFlexibleLayout.as_exact_strides  s]    "
""$**:tyy$**MJKKJJIIKK
 	
rs   c                   | j                  | j                  |      }| j                         r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                        S r|   )r  r   ri  r  r   r  r   r  )r   r   r  s      rr   as_fill_orderzFlexibleLayout.as_fill_order  sl    &&tyy%8
""$**:tyy$**MJKKJJIIKK
 	
rs   c                   | j                  | j                  |      }| j                         r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                        S r|   )r  r   ri  r  r   r  r   r  )r   r   r  s      rr   as_same_orderzFlexibleLayout.as_same_order  sl    &&tyy&9
""$**:tyy$**MJKKJJIIKK
 	
rs   c                    |rt         j                  ||      }nt         j                  |      }t        |   ||||       y r|   )r   r  r   rp  r  )r   r   r   r   rq  r  rr  s         rr   r  zFlexibleLayout.__init__  s;    $11$EG$77=Gg6rs   Fr|   )r   r%  r&  r,  r  r(  r   r  r  r  r  r  r  r  r  r  r  r  s   @rr   r   r   -  s    2N 0 0    	> 	> & &0 
> 
>





7 7rs   r   c                  0     e Zd ZdZd fdZd Zd Z xZS )NonOwningLayoutz,Is a view into the storage of another tensorc                    |j                         }t        | 	  |j                  |j                  |j
                  |j                         || _        y r|   )r   rp  r  r   r   r   r   view)r   r  rZ  rr  s      rr   r  zNonOwningLayout.__init__  sA    "MMLLKKMM		
 	rs   c                >    | j                         j                         S r|   )r  r  r   s    rr   r  zNonOwningLayout.make_indexer  s    }}++--rs   c                    | j                   j                         j                  }|dk(  ryddlm} t
        j                  j                  j                  ||      S )Nr   Tr2   )	ALIGNMENT)	r  r   r  utilsr  rP   r   r   statically_known_multiple_of)r   r  r  s      rr   maybe_guard_alignedz#NonOwningLayout.maybe_guard_aligned  sD    %%'..Q;$ww<<VYOOrs   )r  zUnion[BaseView, TensorBox])r   r%  r&  r,  r  r  r  r  r  s   @rr   r  r    s    6.Prs   r  c                      e Zd Zd Zd Zd Zy)
NoneLayoutc                2    || _         dg| _        dg| _        y r  )r   r   r   r  s     rr   r  zNoneLayout.__init__  s    C	crs   c                     yr  rw   r   s    rr   r  zNoneLayout.storage_size  r+  rs   c                    | S r|   rw   r   s    rr   r  zNoneLayout.as_fixed      rs   N)r   r%  r&  r  r  r  rw   rs   rr   r  r    s    
rs   r  c                       e Zd Zd	 fdZej
                  j                  d        Zd
dZddZd Z	e
dd       Zd Zd Z xZS )MutationLayoutSHOULDREMOVEc                   t         |   |j                         |j                         |j	                         d        || _        | j                         j                         }t        j                  j                  |       y r|   )rp  r  r   r   r   r  
get_bufferr  rP   r   mark_buffer_mutated)r   r  r   rr  s      rr   r  z#MutationLayoutSHOULDREMOVE.__init__  se    OO		
  ))+	##D)rs   c                6    | j                         j                  S r|   )real_layoutr   r   s    rr   r   z!MutationLayoutSHOULDREMOVE.stride  s    !(((rs   c                >    | j                         j                         S r|   )r  r  r   s    rr   r  z'MutationLayoutSHOULDREMOVE.storage_size  s    !..00rs   c                d    fd | j                         }t        |t              sJ d       |S )Nc                    t        | t              r | j                        S t        | t              r | j	                               S t        | t
              r | j                        S | S r|   )r]   r  r  r  r  
MutableBoxr[  )r  unwrap_viewss    rr   r  z;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_views  sY    &"<=#FMM22&(+#F$6$6$899&*-#FKK00Mrs   z1MutationLayoutSHOULDREMOVE must refer to a buffer)r  r]   rv  )r   r"  r  s     @rr   r  z%MutationLayoutSHOULDREMOVE.get_buffer  s>    	 dkk*F
 	?>	? 
 rs   c                6    | j                         j                  S r|   )r  rZ  r   s    rr   r  z&MutationLayoutSHOULDREMOVE.real_layout
  s     '''rs   c                   |j                          t        j                  j                  |j	                                t        |t              r|j                  }|j                          |st        j                  |j                         |j                         |j                         t        |j                         |j                               D cg c]/  \  }}t        j                  j                   j#                  ||      1 c}}      j                  }|j                          t        |j                  j$                  t&              sJ t)        |      |j                  _        |j                  S c c}}w )Nr  )r  rP   r   r  r  r]   rX   r[  r!  r  r  r   r   r  r   r   r   r  rZ  r   r  )r  srcdstunsafe_aliasr  r  s         rr   realize_intoz'MutationLayoutSHOULDREMOVE.realize_into  s    	
##CLLN3c9%((C 	""~~'mmo* !$CLLNCLLN C1 GG$$11!Q7	 #  d  	#((//>:::4S9xxs   4E6c                    | S r|   rw   r   s    rr   r  z#MutationLayoutSHOULDREMOVE.as_fixed0  r  rs   c                6    | j                   j                         S r|   )r  r  r   s    rr   r  z'MutationLayoutSHOULDREMOVE.make_indexer3  r!  rs   )r  rY   r  )ru   rv  r  )r   r%  r&  r  r
  r   getterr  r  r  r  r  r  r  r  r  s   @rr   r  r    sT    	* ]]) )1 (    D*rs   r  c                       e Zd ZU ded<   ded<    fdZd Zd"dZd Zd	 Zd#d
Z	e
d        Zd Zd Zd Zd Zd Zd Zd Zd$dZd Zd Zd$dZd Zd Zd%dZd Zd Zd Zd&dZd'dZd'dZd  Z d! Z! xZ"S )(rv  r  r   r
  rZ  c                0    t         |           d | _        y r|   ro  rq  s    rr   r   zBuffer.__post_init__A  rs  rs   c                6    | j                   j                         S r|   r   r   s    rr   r  zBuffer.make_indexerE  r!  rs   c                @    | j                   sJ |        | j                   S r|   rF  r   s    rr   r  zBuffer.get_nameH  s    yy$yyyrs   c                .    | j                   j                  S r|   r  r   s    rr   r   zBuffer.get_deviceL  r  rs   c                    | j                   S r|   rx  r   s    rr   r5  zBuffer.get_origin_nodeO  ry  rs   c                     y r|   rw   r   s    rr   r   zBuffer.get_defining_opR  r  rs   c                0    t        | j                  dd       S )Nr   )r}   rZ  r   s    rr   r   zBuffer.dtypeU  s    t{{GT22rs   c                @    t        | j                  j                        S r|   r  r   s    rr   r   zBuffer.get_sizeY  r  rs   c                @    t        | j                  j                        S r|   r  r   s    rr   r  zBuffer.get_stride\  r  rs   c                .    | j                   j                  S r|   )rZ  r  r   s    rr   
get_offsetzBuffer.get_offset_  r  rs   c                    | j                   S r|   r#  r   s    rr   r   zBuffer.get_layoutb  rv  rs   c                "    | j                         S r|   )r	  r   s    rr   r  zBuffer.get_storage_numele  s    ~~rs   c                     yr;  rw   r   s    rr   r<  zBuffer.is_externh  r=  rs   c                    t        | j                  t        t        f      s | j                  j	                         | _        y y r|   )r]   rZ  MultiOutputLayoutr  r  r   s    rr   rw  zBuffer.freeze_layoutk  s0    $++(9?'KL++..0DK Mrs   c                    t        | j                  t              sJ | j                  j                  ||      | _        y Nrt  )r]   rZ  r   r  )r   r   rr  s      rr   rx  z&Buffer.freeze_layout_with_stride_ordero  s1    $++~666kk11%}1Urs   c                |    t        | j                  t              sJ | j                  j                  |      | _        y r|   )r]   rZ  r   r  r  s     rr   freeze_layout_with_fill_orderz$Buffer.freeze_layout_with_fill_orders  s,    $++~666kk//6rs   c                |    t        | j                  t              sJ | j                  j                  |      | _        y r|   )r]   rZ  r   r  )r   r   s     rr   freeze_layout_with_same_orderz$Buffer.freeze_layout_with_same_orderw  s,    $++~666kk//7rs   c                    t        | j                  t              sJ | j                  j                  ||      | _        y r  )r]   rZ  r   r  )r   rs  rr  s      rr   ry  z'Buffer.freeze_layout_with_exact_strides{  s6    $++~666kk22 3 
rs   c                    t         j                  j                  j                  t	        j
                  | j                         d            S r  r  r   s    rr   r  zBuffer.is_zero_elements  r  rs   c                p      j                         rt        t         j                               S  fd}|S )Nr  c                    j                   j                         }t        j                  j                   ||             S r|   )rZ  r  rN   r  r   r   r  r   s     rr   r  z"Buffer.make_loader.<locals>.loader  s.    kk..0G88DIIwu~66rs   )r  r   r  r   r  s   ` rr   r  zBuffer.make_loader  s/      "=0@AA	7 rs   c                "    | j                         S r|   r  r  s     rr   r  zBuffer.codegen_reference  r  rs   c                     y r|   rw   r   s    rr   rg  zBuffer.decide_layout  r%  rs   c                    t        | j                  t              r%| j                  j                  j	                         gS yrS  )r]   rZ  r  r  r  r   s    rr   get_inputs_that_alias_outputz#Buffer.get_inputs_that_alias_output  s/    dkk?3KK$$--/00rs   c                    t        | j                  t              r%| j                  j                  j	                         gS yrS  )r]   rZ  r  r  r  r   s    rr   get_mutation_nameszBuffer.get_mutation_names  s0    dkk#=>KK&&//122rs   c                6    t        | j                         g      S r|   )r-   r  r   s    rr   r   zBuffer.get_read_names  s    4==?+,,rs   c                    t               S r|   r,   r   s    rr   r"  zBuffer.get_unbacked_symbol_uses  rP  rs   c                    t               S r|   r,   r   s    rr   rO  zBuffer.get_unbacked_symbol_defs  rP  rs   c                     y r|   rw   r   s    rr   r  zBuffer.realize  r%  rs   c                     yr;  rw   r   s    rr   should_allocatezBuffer.should_allocate  s    rs   rT  )ru   zOptional[Operation]r  r|   r#  rW  )#r   r%  r&  r'  r   r  r  r   r5  r   r+  r   r   r  r  r   r  r<  rw  rx  r  r	  ry  r  r  r  rg  r  r  r   r"  rO  r  r  r  r  s   @rr   rv  rv  7  s     N
 *"  3 3&(" 1V78
W	

-rs   rv  c                  "    e Zd ZddZddZd Zy)OperationBufferc                    | gS r|   rw   r   s    rr   rM  zOperationBuffer.get_outputs  s	    vrs   c                    | S r|   rw   r   s    rr   r   zOperationBuffer.get_defining_op  r  rs   c                X    t         j                  |        t        j                  |        y r|   )rv  r   r-  r   s    rr   r   zOperationBuffer.__post_init__  s    T"%rs   NrU  ru   r-  )r   r%  r&  rM  r   r   rw   rs   rr   r  r    s    &rs   r  c                      e Zd Zd Zy)InputBufferc                     yr  rw   r   s    rr   r  zInputBuffer.num_reads  r+  rs   N)r   r%  r&  r  rw   rs   rr   r#  r#    s    rs   r#  c                  (    e Zd ZU dZded<   d Zd Zy)r  NzOptional[torch.device]r  c                      fd}|S )Nc                    j                   j                         }t        j                  t        j
                  j                  j                         j                         ||             S r|   )	rZ  r  rN   r  rP   r   constant_namer  r  r  s     rr   r  z*ConstantBuffer.make_loader.<locals>.loader  sM    kk..0G88%%dmmot7K7KL rs   rw   r  s   ` rr   r  zConstantBuffer.make_loader  s    	 rs   c                    t        t        j                  j                  | j	                         |      | j
                        S r|   )r  rP   r   r(  r  rZ  r  s     rr   r  z!ConstantBuffer.constant_to_device  s/    GG!!$--/6:DKK
 	
rs   )r   r%  r&  r  r'  r  r  rw   rs   rr   r  r    s    .2O+2
rs   r  c                      e Zd ZddZddZy)NoneAsConstantBufferc                    t               S r|   r,   r   s    rr   r"  z-NoneAsConstantBuffer.get_unbacked_symbol_uses  rP  rs   Nc                J    t         j                  j                  j                  S r|   )rP   r   r(  none_strr  s     rr   r  z&NoneAsConstantBuffer.codegen_reference  s    ww##,,,rs   rW  r|   )r   r%  r&  r"  r  rw   rs   rr   r+  r+    s    -rs   r+  c                  >     e Zd Z fdZed        ZddZddZ xZS )ShapeAsConstantBufferc                0    t         |           || _        y r|   )rp  r  _shape)r   r  rr  s     rr   r  zShapeAsConstantBuffer.__init__  s    rs   c                    | j                   S r|   )r2  r   s    rr   r  zShapeAsConstantBuffer.shape  s    {{rs   c                ,    t        | j                        S r|   )r(   r  r   s    rr   r"  z.ShapeAsConstantBuffer.get_unbacked_symbol_uses  s    $TZZ00rs   c                    t         j                  j                  j                  t         j                  j                  j                  | j                              S r|   )rP   r   r(  expr_printerr   r  r  r  s     rr   r  z'ShapeAsConstantBuffer.codegen_reference  s6    ww##001A1A1J1J4::1VWWrs   rW  r|   )	r   r%  r&  r  r+  r  r"  r  r  r  s   @rr   r0  r0    s'      1Xrs   r0  c                       e Zd ZU ded<   d Zd ZddZd ZddZ fdZ	d	 Z
d
 Zd Zed        Z	 	 d	 	 	 ddZe	 dd       Zd Zd Zd Zd Zd Z xZS )r`  rY  r[  c                    | j                   | j                   S t        | j                  d      r| j                  j                   S y)z
        Returns self.name if it exists, otherwise returns the name of the data node if that exists.
        If neither exist, returns None.
        Nr   )r   r4  r[  r   s    rr   get_computed_buffer_namez'ComputedBuffer.get_computed_buffer_name  s7    
 99 99499f%99>>!rs   c                6    | j                   j                         S r|   r[  r  r   s    rr   r  zComputedBuffer.num_reads  r  rs   c                6    | j                   j                         S r|   r  r   s    rr   r   zComputedBuffer.get_read_names   r  rs   c                   t        j                  t        dd      5  | j                  j	                         rTt        | j                         | j                  j                         | j                  j                               cd d d        S t        | j                         | j                  j                               cd d d        S # 1 sw Y   y xY wr  )
r   r   r   r[  r  r9   get_store_functionr}  r  r   r   s    rr   rA  zComputedBuffer.get_read_writes  s    \\.*:DA 	yy++-*++-II002II002	 	 +++-II&&(	 	 	s   A%C1CCc                    t        | j                               t        | j                               z  t        | j                               z  | j                  j                         z  S r|   )r(   r   r  r  r[  r"  r   s    rr   r"  z'ComputedBuffer.get_unbacked_symbol_uses  sV    & "$--/2#DOO$567#DOO$567 ii0023	
rs   c                    t        | j                  d      rS| j                  t        j                  j
                  vr-| j                         dk(  r| j                  j                         S t        |          S )Nr  r   )	r4  r[  r   rP   r   mutated_buffersr  r  rp  rq  s    rr   r  zComputedBuffer.make_loader*  s[     DII}-		!8!88 A% 99((**w"$$rs   c                   | j                   j                         j                         }t        | j                  t
        t        t        f      r+t        | j                  j                  | j                  |      S t        | j                  t              sJ t        | j                  j                  | j                  |      S r|   )rZ  r  r  r]   r[  r  r  rH  r   r  r   r  r  r  s     rr   r>  z!ComputedBuffer.get_store_function5  s    ++&&(557dii)T4!8949944diiIIdii33349911499gFFrs   c                F   t        | j                  t              rvt        j                  | j
                  j                         | j
                  j                               \  \  }}}| j                         j                  }t        d |D              sJ |D cg c]Z  }t        |t        j                        r>t        |j                  |D ci c]  }|dk7  s	|t        j                  d        c}      \ }}}|rt        | j
                  t         t"        f      r| j
                  j%                  ||      }n|}|D cg c],  }t&        j(                  j*                  j-                  ||      . }	}ddlm}
  |
|	| j3                               S yc c}w c c}}w c c}w )al  
        If our layout is still flexible, try to determine the stride order based on stride orders of reads.

        TODO(jansel): A better algorithm here would look at downstream consumers of this
                      value and try to do global graph-level layout optimization.
                      This is also something just begging to be autotuned.
        c              3  p   K   | ].  }t        |t        j                  t        j                  f       0 y wr|   )r]   r4   StarDep	MemoryDepr  s     rr   rI  z0ComputedBuffer.get_fill_order.<locals>.<genexpr>L  s0       1|33\5K5KLMs   46r   r2   pick_loop_orderN)r]   rZ  r   r4   r~  r[  r}  r  rA  rK  rd  rF  rM   r   rh   r   r  rH  r   rP   r   r   r  	schedulerrH  r   )r   
index_varsr  r   rK  r  vrj  exprstride_lengthsrH  s              rr   get_fill_orderzComputedBuffer.get_fill_order=  so    dkk>2.:.M.M		,,.		0L0L0N/+(Z! ((*00E      	 a!7!78	 GG>TaQRSVaq!11TE  dii$6"ii//
NKG(GMR"EIAGG$$11$@" " 7&~t}}GG% U"s$   3F
FF1	F1FFc                    t        | j                  t              r5| j                         }|r| j	                  |       y | j                          y y r|   )r]   rZ  r   rN  r  rw  r  s     rr   rg  zComputedBuffer.decide_layoutf  s@    dkk>2'')E2259""$ 3rs   c                z   t        j                  | j                  j                         | j                  j	                         d      \  }}t        j                  t        d| j                               5  t        | j                         | j                         r|n|d d |g| }d d d        g }g }g }g }|j                         D ]^  \  }}	||d   v r'|rJ |j                  |       |j                  |	       4||d   v sJ |j                  |       |j                  |	       ` ||f||ffS # 1 sw Y   xY w)NqrV   r  r2   r   )r4   r~  r[  r}  r  r   r   r  r   r;   r>  r  itemsre  )
r   r~   
var_rangesr  rJ  reduce_vars
index_sizereduce_sizerK  r   s
             rr   get_default_sizes_bodyz%ComputedBuffer.get_default_sizes_bodyn  sG   '::II((*DII,H,H,JSV
j \\.*;T__=NO 	'')002Ra 	D	 
!#
$$& 	&DAqDG|&&!!!$!!!$DG|#|""1%""1%	& K($[0III)	 	s   52D11D:c                     j                         \  \  }}}\  }}|r |||f|||f      \  \  }}}\  }}g |j                  j                         |t        |t              rt        |      dk(  sJ |\  }}	t        |t              sJ t        |	t              sJ t        d |	D              sJ |j                  }
|
|k(  s	J |
|f       |	D cg c]	  }|vs| }	}|	z  g |j                         t        j                  j                   t        j                        sj!                  |j#                                 fd}||z   } j%                         j&                  dk7  xs t(        j*                   } |||||      \  }}} |||||      \  }}}t-        j.                  ||d      \  \  }}}t1        | ||       ||      g|||      }||f|fS c c}w )an  
        This is a main place where we do loop transformations in a
        backend-agnostic way.

        Here we:
            1) Remove any 1 dimensions
            2) Fuse contiguous dimensions together
            3) Reorder dimensions based on stride orders

        Optional argument extra_indexing_constraints can be used to append additional
        indexing expressions to existing ones derived from buffer's body. This can be useful
        to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
        on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
        the scheduler node compatible with other nodes.
        Optional argument recompute_sizes_body_func can be used to recompute sizes and body
        on the default body. This can be useful to append additional loop transformations.
        r   c              3  <   K   | ]  }t        |t                y wr|   )r]   r   )rG  fs     rr   rI  z6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>  s     Hqz!T*Hrk  c           	         j                  | ||
      \  }}} ||       } |rGt        j                  j                  j	                  | |t        	| |            \  }}}t        ||      }n|}|||fS r|   )_apply_loop_reorderingrP   r   r   _simplify_loopsr6   r   )x_varssupport_varsr  simplify_loopsreindex0r   r   pruner   index_formulasmemory_addrsr   s            rr   simplify_and_reorderzAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reorder  s    (,(C(Ce\)%E8X f%F)*)9)9)I)I,^VUK*&x
 *(H="'8++rs   cudazrV   )rW  indexing_exprsra   r]   r_   r   r`   r^   rd  rS  get_write_exprsrP   r   rt  r5   PREFER_STORE_LOOP_ORDERextendget_read_exprsr   rn   r3   loop_ordering_after_fusionr4   index_vars_no_squeezer;   )r   extra_indexing_constraintsrecompute_sizes_body_funcrU  rV  r  rJ  rT  extra_indexing_rangesextra_indexing_exprexpected_var_rangesrc  re  r_  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsrS  rc  rd  s   `                     @@rr   re  z#ComputedBuffer.simplify_and_reorder  se   4 '')		
%Z%Z %
 *[)4*k1J	)[)[
 94..5578%15u=23q89 :T6!#63T:::14888H4GHHHH"&//&*?? #%B ? /#!>2I# # 11N0--/0ww""4)O)OP 3 3 56	,$ "K/OO""f,UF4U4U0U 	 (<	(
$\1 ,@{4F,
(~q
 0</Q/Q0
, K*
 )$n[&AB
 ]+T11w#s   	G G c           
     X   ddl m} |g }	 |D cg c]-  }t        j                  j                  j                  || |      / }}t        |      t        |      k(  rt        |d         t        |       k(  sJ t        t         ||||                  }|D 	cg c]  }	||	   	 }}	|t#        |      t%        |      fS c c}w # t        $ rZ t        j                  r*t        j                  dt        t        | |            |       t        t!        t        |                  }Y w xY wc c}	w )zU
        Shuffle the order of loops around to hopefully improve performance.
        r2   rG  r   z%Did not simplify complex index:
%s
%s)rI  rH  rP   r   r   r  r   r^   r  	Exceptionr3   r}  r|  warningr`   r   r   r   r   )
rJ  r_  r  rd  priority_idxrH  rL  r  r   r   s
             rr   r\  z%ComputedBuffer._apply_loop_reordering  s'    	/L	, )   --dJMG  w<3|#44WQZCM :   /'5,"OPQE $))aq))l5)?5+AAA#  	,||=Z/0 
 s5z*+E	, *s*   C 2B<AC D'<C A D$#D$c                6    | j                   j                         S r|   )r[  r  r   s    rr   r  z!ComputedBuffer.get_reduction_size       yy++--rs   c                6    | j                   j                         S r|   )r[  r  r   s    rr   r  z!ComputedBuffer.get_reduction_type#  r  rs   c                6    | j                   j                         S r|   )r[  r  r   s    rr   r?  zComputedBuffer.is_no_op&  r  rs   c                     yNTrw   r   s    rr   r  zComputedBuffer.should_allocate)  r  rs   c                8    | j                   j                  |      S )r  )r[  r  r  s     rr   r  z!ComputedBuffer.constant_to_device,  s    yy++F33rs   r#  rW  NNro  z*Optional[Tuple[Dict[Any, Any], List[Any]]]rp  zOptional[Callable[..., Any]]r|   )r   r%  r&  r'  r9  r  r   rA  r"  r  r>  rN  rg  rA   rW  re  r(  r\  r  r  r?  r  r  r  r  s   @rr   r`  r`    s    
K	%*
2	%G'R% J J8 RVBFq2$Nq2 $@q2f  !B !BF..,4rs   r`  c                  Z     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
	 	 d
	 	 	 dd	Z xZS )TemplateBufferzt
    Represents a Triton (in the future other type) of template operator
    that we can fuse an epilogue onto.
    c                    t         |   d |       t        j                  |      | _        || _        t        j                  j                  |       | _	        t        j                  j                  |        y N)r   rZ  )rp  r  InputsKernelunwrap_storageinputsmake_kernel_renderrP   r   register_bufferr   register_operation)r   rZ  r  r  rr  s       rr   r  zTemplateBuffer.__init__7  sW    d62"11&9"4GG++D1		""4(rs   c                &    | j                  d      S )NT	normalize)r9   r   s    rr   rA  zTemplateBuffer.get_read_writes>  s    ''$'77rs   c                    | j                         | j                  j                         fd}t        j                  || j                         d|      }t        d | j                  D              |_        |S )Nc                ^    t        |      dk(  sJ t        j                   |       d      S )Nr   fake)r   rN   r  )r   r!  r  r   s     rr   dummyz1TemplateBuffer.extract_read_writes.<locals>.dummyE  s,    v;!###99T75>6::rs   rw   r  c              3  b   K   | ]'  }t        j                  |j                                ) y wr|   )r4   rE  r  r<  s     rr   rI  z5TemplateBuffer.extract_read_writes.<locals>.<genexpr>L  s!     Xq 4 4QZZ\ BXs   -/)	r  rZ  r  r4   r9   r   r-   r  rK  )r   r  r  depsr  r   s       @@rr   r9   z"TemplateBuffer.extract_read_writesA  sc    }}++**,	; //4==?B)
  XDKKXX
rs   c                     yr  rw   r   s    rr   r  z!TemplateBuffer.get_reduction_sizeO  r+  rs   c                     y r|   rw   r   s    rr   r  z!TemplateBuffer.get_reduction_typeR  r  rs   c                     yr;  rw   r   s    rr   r?  zTemplateBuffer.is_no_opU  r=  rs   c                     yr  rw   r   s    rr   r  zTemplateBuffer.should_allocateX  r  rs   c                *    | j                         dfd fS rS  r  )r   ro  rp  s      rr   re  z#TemplateBuffer.simplify_and_reorder[  s$      
 	
rs   r  r  )r   r%  r&  r,  r  rA  r9   r  r  r?  r  re  r  r  s   @rr   r  r  1  sM    
)8
 RVBF
$N
 $@
rs   r  c                  8     e Zd Z	 	 d	 d fdZddZddZ xZS )TritonTemplateBufferc           
        t         
|   |||       || _        || _        | g| _        |t
        j                  j                  j                  t
        j                  j                  j                  f}t        j                  j                  j                  }||v sJ d| d|        | j                  d   j                         }| xj                  |D 	cg c]  }	t!        t#        |      |	|        c}	z  c_        yyc c}	w )a  
        NOTE:[TritonTemplates with multiple outputs]
        We want the ability for TritonTemplates to output multiple tensors. Triton
        kernels have no notion of outputs and this is done by creating tensors that
        are then mutated by the kernel. Currenlty our STORE_OUTPUT codegen doesn't
        support creating multinode outputs for triton templates.
        We work around this by creating an extra input buffer during the lowering
        and we mark them as mutated inputs.
        Nz$Mutated inputs are only allowed for z	 but got r   )rp  r  debug_extramutated_inputsoutputsrb   rN   higher_orderflex_attentionflex_attention_backwardrP   r   current_noder  r  r   MutationOutputr  )r   rZ  r  r  r  r  allowed_setr  r   rl  rr  s             rr   r  zTritonTemplateBuffer.__init__j  s    " 	);<&,&*V% 		&&55		&&>>K 77//66L+[5k])L>Z[+[[^..0FLLIWBEz&13= L &s   C>c                    | j                   S r|   )r  r   s    rr   rM  z TritonTemplateBuffer.get_outputs  r  rs   c                @    d| j                    d| j                   d}|S )NzTritonTemplateBuffer(layout=r  r   )rZ  r  )r   r   s     rr   rm  zTritonTemplateBuffer.__str__  s&    ,T[[MD<L<L;MQO
rs   r  )r  zOptional[Iterable[IRNode]]rU  rT  )r   r%  r&  r  rM  rm  r  r  s   @rr   r  r  i  s%     59" 3"Hrs   r  c                  X     e Zd ZdZ fdZd
dZddZd ZddZddZ	ddZ
dd	Z xZS )ChoiceCallera.  
    Represents a possible choice used in autotune_process.py.
    During autotuning, self.benchmark() is first called to get benchmark result,
    and if this choice is selected, self.output_node() is called to get the output_node.

    Children classes: TritonTemplateCaller, CUDATemplateCaller.
    c                L    t         |           || _        || _        || _        y r|   )rp  r  r   rZ  input_nodes)r   r   r  rZ  rr  s       rr   r  zChoiceCaller.__init__  s$    	&rs   c               T    | j                         }t        j                  ||d|i      S )Nr   )to_callabler>   	benchmark)r   r   r~   algos       rr   r  zChoiceCaller.benchmark  s)    !$$T4%>>rs   c                    t         r|   r   r   s    rr   	call_namezChoiceCaller.call_name  r   rs   c                    t         r|   r   r   s    rr   r  zChoiceCaller.to_callable  r   rs   c                    t         r|   r   r   s    rr   hash_keyzChoiceCaller.hash_key  r   rs   c                    t         r|   r   r   s    rr   output_nodezChoiceCaller.output_node  r   rs   c                    i S )zRInformation returned here is logged to the autotune log file when that is enabled.rw   r   s    rr   	info_dictzChoiceCaller.info_dict  s    	rs   c                     y)Nunsupported_choicerw   r   s    rr   autoheuristic_idzChoiceCaller.autoheuristic_id  s    #rs   )ru   r  rT  )ru   rX   )ru   z<Dict[str, Union[PrimitiveInfoType, List[PrimitiveInfoType]]])r   r%  r&  r,  r  r  r  r  r  r  r  r  r  r  s   @rr   r  r    s0    '?""""$rs   r  c                      e Zd ZddZy)TritonTemplateCallerBasec                    t         r|   r   r   s    rr   get_make_kernel_renderz/TritonTemplateCallerBase.get_make_kernel_render  r   rs   N)ru   r   )r   r%  r&  r  rw   rs   rr   r  r    s    "rs   r  c                  x     e Zd ZdZ	 	 	 	 	 	 d fdZedd       Zej                  d	d       Z	d	dZ
d
dZ xZS )MultiTemplateBufferaG  
    Represents a Buffer with multiple backing implementation choices.

    Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
    epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
    Otherwise, the fastest base choice will be chosen.
    c                T    t         |   ||d        || _        d | _        || _        y )N)rZ  r  r  )rp  r  _choice_timings_fn_choice_timingsoriginal_inputs)r   rZ  r  choice_timingsrr  s       rr   r  zMultiTemplateBuffer.__init__  s1     	v$O"0DH%rs   c                \    | j                   | j                         | _         | j                   S r|   )r  r  r   s    rr   r  z"MultiTemplateBuffer.choice_timings  s+    '#'#:#:#<D ###rs   c              #    K   t        |t        j                  j                  j                        sJ | j
                  |j
                  k(  sJ | j                  }|j                         | _        	 d  || _        y # || _        w xY wwr|   )r]   rb   rc   select_algorithmTritonTemplateCallerrZ  r  r  )r   callerrenders      rr   swap_as_triton_callerz)MultiTemplateBuffer.swap_as_triton_caller  sp     &%//"B"B"W"WXXX{{fmm+++(("("?"?"A	-&,D#fD#s   A-B0A< 4B<	BBc                J   t        |t        j                  j                  j                        sJ | j
                  j                  |j
                  j                  k(  sJ | j
                  j                  |j
                  j                  k(  sJ |j                         | _	        y r|   )
r]   rb   rc   r  r  rZ  r   r   r  r  )r   r  s     rr   finalize_as_triton_callerz-MultiTemplateBuffer.finalize_as_triton_caller  sw    &%//"B"B"W"WXXX{{6==#5#5555{{!!V]]%9%9999"("?"?"Ars   c                z    t        | j                  | j                  j                        }|| j                  |   fS )Nr5  )r  r  r  )r   
min_choices     rr   get_min_choicez"MultiTemplateBuffer.get_min_choice  s6    ,,$2E2E2I2IJ
D//
;<<rs   )rZ  r
  r  zList[IRNode]r  z'Callable[[], Dict[ChoiceCaller, float]])ru   zDict[ChoiceCaller, float])r  r  )ru   zTuple[ChoiceCaller, float])r   r%  r&  r,  r  r+  r  r)  r*  r  r  r  r  r  s   @rr   r  r    sc    	&	& 	& @		& $ $
 	- 	-B=rs   r  c                  .     e Zd Z	 	 	 	 d fdZd Z xZS )CUDATemplateBufferc                D    t         |   |||       || _        || _        y r|   )rp  r  workspace_sizetemplate)r   rZ  r  r  r  r  rr  s         rr   r  zCUDATemplateBuffer.__init__  s&     	);<, rs   c                6    | j                   | j                   S dS r  )r  r   s    rr   rS  z%CUDATemplateBuffer.get_workspace_size  s    &*&9&9&Et""L1Lrs   )r  rl   r  CUDATemplate)r   r%  r&  r  rS  r  r  s   @rr   r  r    s"    !
 ! !Mrs   r  c                       e Zd Z fdZ xZS )CppTemplateBufferc                D    t         |   |||       || _        || _        y r|   )rp  r  r  choice)r   rZ  r  r  r  r  rr  s         rr   r  zCppTemplateBuffer.__init__	  s#    );< rs   r   r%  r&  r  r  r  s   @rr   r  r    s     rs   r  c                  J    e Zd ZU ded<   d Zed        Zed        Zd Z	d Z
y)	r  rV  r  c                ~   t               }t        j                  | j                  D ]R  }t	        |t
              r|j                  fd|D               .|j                   |j                                      T t        fd| j                         D              }t        j                  ||t                     S )Nc              3  J   K   | ]  } |j                                 y wr|   r  )rG  r   rE  s     rr   rI  z/InputsKernel.get_read_writes.<locals>.<genexpr>  s     BqWQZZ\2B    #c              3  J   K   | ]  } |j                                 y wr|   r  )rG  rl  rE  s     rr   rI  z/InputsKernel.get_read_writes.<locals>.<genexpr>  s!      :
(+GCLLN#:
r  )rK  writesindex_exprs)r-   r4   rE  r  r]   r^   updater  r  rM  
ReadWrites)r   rK  inputr  rE  s       @rr   rA  zInputsKernel.get_read_writes  s    .8l&&[[ 	5E%&BEBB		'%.."234		5 0: :
/3/?/?/A:
 0
 &&"
 	
rs   c                   t        |t              r|j                  }t        |t              r|j                  }t        |t              r%t        |t
              st        j                  |      }t        |t              r| j                  |      S t        |t              r|S t        |t        t
        f      sJ |       |S r|   )r]   rX   r[  ru  r  rz  r  r  unwrap_storage_for_inputTorchBindObjectrv  r  r   s     rr   r  z%InputsKernel.unwrap_storage_for_input&  s    a#Aa$Aa":a+I**1-Aa#
 //22a)H!fo67::7rs   c                    g }| D ][  }t        |t              r#|D cg c]  }t        j                  |       }}nt        j                  |      }|j	                  |       ] |S c c}w r|   )r]   r^   r  r  re  )r  
inputs_newr   r   s       rr   r  zInputsKernel.unwrap_storage9  sj    
 	!A!T"GHI!\::1=II 99!<a 	! 	 Js   A%c                     yr  rw   r   s    rr   r<  zInputsKernel.is_externD  r  rs   c                     yr  rw   r   s    rr   r  zInputsKernel.num_readsG  r+  rs   N)r   r%  r&  r'  rA  r  r  r(  r  r<  r  rw   rs   rr   r  r    s?    
&  $  rs   r  c                      e Zd Zd Zy)	NopKernelc                     yr  rw   r   s    rr   r?  zNopKernel.is_no_opL  r  rs   N)r   r%  r&  r?  rw   rs   rr   r  r  K  s    rs   r  c                  F    e Zd ZdZed        Zed        Zed        Zd Zy)ConcatKernelzn
    There isn't actually a real kernel for concat, we just change the
    storage for the upstream data.
    c                   |d   j                         }|d   j                         }t        |d   j                               }dg}||   g}d|cxk  rt	        |      k  sJ  J t        dt	        |            D ]  }||   j                         }	|j                  ||          t	        |	      t	        |      k(  sJ ||   j                         |k(  sJ ||   j                         |k(  sJ t        t	        |            D ]I  }
|
|k(  r||
   |	|
   z   ||
<   t        j                  j                  j                  ||
   |	|
         ||
<   K |j                  ||           t        j                  |      }t        t	        |            D ]k  }||   }t        |      s|j                         }t        |t               s5t"        j%                  |j&                  |j(                        s`t+        |      } n t-        d |D              }t        j                  j.                  j0                  d   }t        |t              sJ |du rt-        d |D              rt+        |      }t3        d t!        ||||      g       }t5        |      }g }t        t	        |            D ]  }| j7                  ||   t8        j;                  ||||   ||   d            }|j<                  j                  |       t        ||   j>                  t@              r||   j>                  jC                         }n||   j>                  }|jE                         stG        ||   j                         jH                        stK        |      r|j                  |jM                                 t	        |      dkD  rMt        j                  jO                  |tP        jR                        rt        j                  jU                  |       t        j                  jW                  |      |_,        | j[                  |j<                        |_        t        j                  j]                  |       |S )	Nr   r2   c              3  2   K   | ]  }t        |        y wr|   )r   r<  s     rr   rI  z&ConcatKernel.create.<locals>.<genexpr>y  s     -W1.CA.F-Wrd  Fc              3     K   | ]p  }d |j                   v xr\ |j                   d    j                  t        j                        xs- |j                   d    j                  t        j                         r yw)r  r  N)r  rk  rb   r  r  )rG  args     rr   rI  z&ConcatKernel.create.<locals>.<genexpr>}  sq      <
  SXX --E<O<O-P W88E?00u?U?U0V<
s   A6A8)r   r   r   r   r   rZ  r  )rB  )/r   r   r^   r   r   r   re  rP   r   r   r  r   r   r   r   r]   r  r
  r}  r   r   r"   r  r  r~   r  ru  r  r9  r  r  r[  r  r  is_input_bufferrH   rn   rG   r9  rt  r5   FOREACHregister_operation_listr  r   r  r  )r  r  r	  r   r   r  offsets_startoffsets_endr   
input_sizer  output_strider   rZ  any_input_is_storage_and_layoutfx_node_argsconcat_kernelkernelop_namesinput_bufferinput_unwrappeds                        rr   r  zConcatKernel.createV  s   %%'q	##%q	**,-}oC'#h-'''''q#f+& 	.A++-J  #/z?c(m333!9&&(E111!9'')V3333x=) 8"*1+
1"=HQK"#''"2"2"?"? Z]#HQK	 x}-	. '99(Cs6{# 		Aq	A$Q'K88fmmT$B8$LM		 +.-WPV-W*W'ww++003,---*e3 <
 $<
 9
 ;8DM$$	 	
 M*s6{# 	CA++q	  Cq!1;q> ! L   ''5&)..(3"().."<"<">"()..  //16!9//1667"<0 ? ? AB'	C* x=1!4!4V^=S=S!TGG++H5WW44]C"11-2F2FG	""=1rs   c                    t        |t              r| j                  |j                        S t        |j                  j                  t
              xr t        |j                  t               S r|   )r]   rX   can_realize_into_without_copyr[  rZ  r   ExternKernelAlloc)r  r  s     rr   r  z*ConcatKernel.can_realize_into_without_copy  sU    c9%44SXX>>#((//>: 
:HH'D
 @
 	
rs   c                H   t        |t              s%t        |      rt        |      \  }}t        ||      }t        |t              sJ |       t        |t              r| j                  |j                  |      S t        |t              r_|j                          t        |j                  d      sJ | j                  |      r&t        |      |j                  _        |j                  S t        j                  |j                         |j!                         |j#                         t%        |j'                         |j'                               D cg c]/  \  }}t(        j*                  j,                  j/                  ||      1 c}}      }| j                  ||      S c c}}w )NrZ  r  )r]   rz  r   rf  rX   r  r[  ru  r  r4  r  r  rZ  r  r  r   r   r  r   r   rP   r   r   r  )r  r  r  r  rZ  r  r  pws           rr   r  zConcatKernel.realize_into  sF   
 #/$S)"7"<%gv6#/44/c9%##CHHc22c:&KKM388X...005"1#"6xx>>#--/__&  ?Aq   --a3	  
 C((s   4Fc                     yr  rw   r   s    rr   r  zConcatKernel.should_allocate  r  rs   N)	r   r%  r&  r,  r  r  r  r  r  rw   rs   rr   r  r  P  sK    
 X Xt 
 
 ) )>rs   r  c                      e Zd ZU dZded<    ej                  e      Zded<   dZ	ded	<   dZ
d
ed<   dZd
ed<    ej                  e      Zded<   dZded<   dZded<   dZded<    ej                  e      Zded<    ej                  e      Zded<   	 	 	 	 	 	 	 d9 fd	Zd:dZd;dZd Zd<dZd Zd Zd  Zd=d>d!Zd?d"Zd# Zed$        Ze 	 	 d@d%       Z!e d&        Z"e d'        Z#e d(        Z$e 	 	 	 dA	 	 	 dBd)       Z%e d<d*       Z&e d<d+       Z'e d,        Z(e d-        Z)e d.        Z*d/ Z+d=dCd0Z,d1 Z-d2 Z.d<d3Z/d4 Z0d5 Z1d6 Z2d;d7Z3dDd8Z4e4Z5 xZ6S )Er  rw   zTuple[Any, ...]constant_args)default_factoryzDict[str, Any]r   NzOptional[ReinterpretView]output_viewr  python_kernel_namecpp_kernel_namezIterable[str]ordered_kwargs_for_cpp_kernelzFOptional[Union[torch._ops.OpOverload, torch._ops.HigherOrderOperator]]op_overloadzOptional[List[Dict[str, Any]]]arg_propertiesz#Optional[Dict[str, Dict[str, Any]]]kwarg_propertiesz"Dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszList[MutationOutput]mutation_outputsc                4   t         |   |||       || _        |r|ni | _        || _        |
| _        | j                  |       | j                  |       |	| _        | j                          i | _
        g | _        t        j                  j                  | _        y r|   )rp  r  r  r   r  r  set_cpp_kernel_nameset_python_kernel_namer  collect_arg_kwarg_propertiesr  r   rP   r   r  fx_node)r   r   rZ  r  r  r   r  r  r  r  r  rr  s              rr   r  zExternKernel.__init__  s     		

 + &fB&&  1##$67-J*))+!# "ww++rs   c                     | g| j                   S r|   )r   r   s    rr   rM  zExternKernel.get_outputs  s    -t,,--rs   c                    t               S r|   r,   r   s    rr   rO  z%ExternKernel.get_unbacked_symbol_defs  rP  rs   c                   t        | j                  t        j                  j                        r\| j                  j
                  j                  D cg c]2  }|j                  s$|j                  |j                  |j                  d4 c}n+t        t        | j                              D cg c]  }i  c}| _        t        | j                  t        j                  j                        rP| j                  j
                  j                  D ci c]&  }|j                  |j                  |j                  d( c}ni | _        t        | j                  t        j                  j                        rX| j                   sK| j                  j
                  j                  D cg c]  }|j                  s|j                   c}| _        y y y c c}w c c}w c c}w c c}w )N)r   rn   r  )rn   r  )r]   r  rb   _ops
OpOverload_schema	arguments
kwarg_onlyr   	real_typer  r   r   r  r  allarg_propertiesr  )r   r   r   s      rr   r$  z)ExternKernel.collect_arg_kwarg_properties  sq    $**EJJ,A,AB ))11;; || FFKK%&__ $C$456"6 	$ $**EJJ,A,AB ))11;; qOO
  	 t'')>)>?66 !% 0 0 8 8 B B2all2D. 7 @/ 72s   7G+	G+GG/Gc                   t        |t        t        f      sJ t        |t              rt        |      }| j                  sJ d       t	        |      }t	        | j                        }||k  rqt
        j                  d| j                  ||z
         t        ||      D ]>  }| j                  |   d   }|j                  ||v r||   n| j                  |   d          @ |S )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   r  )
r]   r^   r_   r  r   r|  r}  r  r   re  )r   r~   r   convert_val_to_strn_args
n_pos_argsr   arg_names           rr   fill_non_provided_argsz#ExternKernel.fill_non_provided_args;  s     $u...dE":D""U$UU"T,,-
 JII^  V#	 6:. ..q1&96) 8$,,Q/@ rs   c                z    t        | j                  t              r!| j                          | j	                          y y r|   )r]   rZ  r   apply_constraintrw  r   s    rr   rg  zExternKernel.decide_layout]  s-    dkk>2!!#  3rs   c                J    t        | |      \  }}|r|j                  |       y y r|   )rF   	writeline)r   wrapper
origin_strdetailed_origin_strs       rr   codegen_commentzExternKernel.codegen_commentb  s*    *=dG*L'
'j) rs   c                    t         r|   r   r   r:  s     rr   codegenzExternKernel.codegeng  r   rs   c                   || _         d | _        d | _        d | _        t        j
                  j                  r.t        | j                  t        j                  j                        sy | j                  }| j                   |j                  dk(  rU|j                  dk(  r|j                  j                  d      d   n|j                  j!                  dd      }d| d| _         n|j"                  j$                  | _         dd	lm} |j"                  j*                  | _        | j                   j!                  d
d       d| j                   | _        	  ||      | _        y # t,        $ r
 d| _        Y y w xY w)Natenr  .r   r   z
at::_ops::z::callr2   get_cpp_op_schema::r   )r  cpp_kernel_overload_namecpp_kernel_keycpp_op_schemarP   r   cpp_wrapperr]   r  rb   r)  r*  	namespace_overloadnamer   r  replacer+  r   codegen.wrapperrE  overload_namer{  )r   r  r  opnamerE  s        rr   r"  z ExternKernel.set_cpp_kernel_namej  sJ   .(,%"!ww""*ejj33+
 !!'6) ++y8 OO))#.q100c: 
 *4F86'B$'-~~':':$ 	7(.(D(D%!%!5!5!=!=dC!H I4KhKhJij	$!26!:D 	$!#D	$s   E E0/E0c                   || _         |y | j                  }|y t        |t        j                  j
                        rd|j                   | _         y |j                  j                  dd       d|j                   | _         y )Nztorch.ops.higher_order.z._ops.z.ops.rC  )	r  r  r]   rb   r)  HigherOrderOperatorr   r%  rM  )r   r  r  s      rr   r#  z#ExternKernel.set_python_kernel_name  s    "4)!!>

 > >?(??P&QD# $$,,Xw?@&//ARS #rs   c                    t         j                  j                  rOt        j                  r3t         j                  j
                  j                  | j                        S | j                  S | j                  S r|   )	rP   r   rJ  r3   abi_compatibler(  get_c_shim_func_namer  r  r   s    rr   get_kernel_namezExternKernel.get_kernel_name  sa     ww"" (( $$99$:N:NO	
 ))		
 ((	
rs   c           	        t         j                  | j                         | j                         | j	                         | j                         | j                         | j                               }|j                          |S )N)r   r   r\  r^  r3  r   )	r  r  r   r   r  r   r5  r   r  )r   r  s     rr   
copy_inputzExternKernel.copy_input  sa    <<>++-]]_::<))+oo'  
 	

	rs   c                   ||d}t        j                  |      \  }g g }g }|D ]  }j                  t        |t                     d   r|j                  |       9t        |t
        j                        r5t        j                  j                  j                  j                  |d       }|j                  |        fd}	|D 
cg c]  }
| j                  |
       }}
|D ]  }
t        |
      st        |
d        g }|D ]	  }
t        |
t              se|
j!                         t        j                  j"                  v r;|j                  t        j                  j"                  |
j!                                   yt        |
t              se|
j!                         t        j                  j$                  v r;|j                  t        j                  j$                  |
j!                                   |j                  t'        |
d               |	||      \  }} ||i |}d }t        j(                  j                  x}rOt+        |t        j,                  |       t/        ||t        j,                  j0                  j3                  d            }t        |t4        t6        f      s|gn|}|D ]~  }t        |t8        j:                        s|j<                  s+d	}t        j                  j,                  j0                  j3                  d
d       x}r| d| }|t        j                  _         ||||	|fS c c}
w )N)r~   r   rV  )r  c                $   g }t        |       }t        |      }D ]9  }|r|j                  t        |              |j                  t        |             ; t        j                  |      }|j                  dg       |j                  di       fS )Nr~   r   )iterre  nextpytreetree_unflattenr  )	new_tensor_argsnew_non_tensor_argsr"  
it_tensorsit_non_tensors	is_tensorr  	args_specis_arg_tensors	          rr   unflatten_argsz3ExternKernel.process_kernel.<locals>.unflatten_args  s    Fo.J!"56N* 8	MM$z"23MM$~"67	8
 %%fi8A55$aeeHb&999rs   Trd  )r   r  zEsparsity not handled. Please file issue for sparse inference weights.stack_tracez Found from : 
 ) r]  tree_flattenre  r]   rY   rh   r   rP   r   r   r   create_symintnoder  r   rf  r  r  	constantstorchbind_constantsr   	fake_moder)   r  r&   r  r  r^   r_   rb   Tensor	is_sparsedisable_cudagraphs_reason)r  r  r~   r   binded_args	args_flattensor_argsnon_tensor_argsr  rf  r   example_argsnew_args
new_kwargsexample_outputr  r   example_out_lir   msgrg  rd  re  s                        @@rr   process_kernelzExternKernel.process_kernel  s     $v6%22;?	9%' 	,C  C!89R ""3'c5::.''**44FFsQUFVC&&s+	,
	: 6AAs((+AA  	6A$Q'%a5	6 JL
  	LA a*qzz|qww?P?P/P##AGG$5$5ajjl$CDq(+JJLAGG$?$??##AGG$?$?

$MN##$5aT$JK	L  .lOL*8Z8JN---9-Iq~~~F 9>1>>+>+>+B+B5+I! ntUm<  	
   	8A!U\\*q{{]"#''"6"6";";"?"?t"TT;T E!2;-@C471	8 
 	
g Bs   L>c           
        t        |t              sJ t        |t              r|S |j                         }t        j
                  j                  |j                               }|J |j                         }|d|j                  v rt        |j                  t              r|j                  d   j                  t        j                        s-|j                  d   j                  t        j                        r)|j!                  t#        |j%                                      n|j'                          t)        j*                  |j%                         d      \  }}|d   } |j-                         |      }t        j
                  j.                  j1                  ||      }t        j
                  j.                  j3                  ||      }	t        j
                  j.                  j5                  ||      }
t7        ||	      |
z   }||k7  rt8        j;                  d|	|
|       t<        t        |j>                  tA        |jC                         |jE                         |j%                         |	|
            S )	z
        In order to pass this to an extern kernel we need a
        ReinterpretView not a View.  This allows us to avoid some
        unneeded copies.
        r  r  r  rV   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sr  )r[  rZ  )#r]   r  rz  r  rP   r   r  r  r5  r  rZ  r   rk  rb   r  r  r	  r"   r   rw  r4   r~  r  r   r  stride_vars
offset_varrI   r|  r}  r   r[  r  r   r   )r  r   x_unwrap_viewrl  x_unwrap_view_fx_node
index_argsrS  ra  r   r  r  expecteds               rr   convert_to_reinterpret_viewz(ExternKernel.convert_to_reinterpret_view  s    !X&&&a)H gg  !7!7!9: # 3 3 5 "-.333=//@%**51??"'"5"5 @  )--e4BB"'"8"8 C 
 77.}/E/E/GH '')!-!@!@JJL"

J  ]
  ,  55eZH''""..ujA!!,,UJ?Z1F:HIIR	 &%||~kkmZZ\	
 		
rs   c                   |
t               S t        |t        j                  t        j                  j
                  j                  t        f      rt        |      S t        |t              r[t        j                  j                  t        j                  |j                  |j!                         |j#                                     S t        |t$              r|S t        |t&              r| j)                  |j*                        S t        |t,              r3t-        | j)                  |j*                        |j/                               S t        |t0              r;|j3                          t5        |j7                               r	 | j9                  |      S t        |t<              r|j3                          |S t        |t>              r|S | jA                  |      S # t:        $ r Y Pw xY w)N)r   r   )!r+  r]   rh   r   ri   rj   rk   rl   r0  r\  rP   r   add_tensor_constantrb   tensorr   r   r   r  rX   r  r[  rz  r   r  r  r   r  r  r   ru  r  rX  r  s     rr   r  zExternKernel.realize_input\  s`   9'))a%**ekk&9&9&A&A3GH(++a"77..QWWAKKM!,,.Q  a(Ha#$$QVV,,a)"3#4#4QVV#<allnMMa"IIK$Q]]_5::1== a$IIKHa)H~~a   + s    G 	G"!G"c                    t        |      r<t        |j                               dk(  r|S |j                         D ]  }|dk(  s	|c S  | j                  |      S r  )r   r   r  rX  )r  r   r   s      rr   require_stride1zExternKernel.require_stride1{  sT     #1<<>"a',,. Q;H ~~a  rs   c                   ||J |j                         dk(  r|S t        |      r]t        |j                         t              r9|j                         j
                  }t        |j                         t              r9t        |j                         t              r}|rht        |ddt        ||      rJt        t        j                  j                  j                  |j                         j                              n||       |S t        |ddd ||       |S t        |j                         t              rX|r|j                         j!                  |      s5|r5t#        ||j                         j                  |j%                               r|S t        |j                         t&              rt        |j                         j)                         t              rt+        d      t        |j                         j)                         t              rt|r-|j                         j)                         j!                  |      sC|rCt#        ||j                         j)                         j                  |j%                               r|S t        |t,              rX|r|j                         j!                  |      s5|r5t#        ||j                         j                  |j%                               r|S t        |t.              rt        |j0                  t2              rt        |j0                  t4              st        |j7                               rvt        |j7                         j0                  t8              sN	 | j;                  |j0                        |_        |r| j=                  |||      S |r| j?                  |||      S 	 | jC                  |      }t        |dd|||       |rt        ||      sJ |S # t@        $ r Y ?w xY w)Nr   TF)re  rp  rq  rr  ro  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutrt  )"r	  r   r]   r   r  r  r   rf  r}  r   rP   r   r   
size_hintsr   r  r|  r  r   r  r  r  r#  rX   r[  r  rz  r  r  r  require_stride_orderrequire_exact_stridesr   rX  )r  r   r   rs  rr  s        rr   require_strideszExternKernel.require_strides  sF     M$===;;=AH #Q\\^_=LLN'' Q\\^_=!,,..9 *#(- >aG &6GG,,778M8MN& #&3
 H *#(-%)&3&3 HALLNK81<<>;;EB!1%q||~'<'<ajjl
 ALLN,FGalln88:NK(b    : : <kJq||~99;MMeT%5)LLN668??JJL H a%q||~77>-!1<<>#8#8!**,
 Hq)$1668,qvv7%ammo6q}}335FG88@335 4   #44= 5   # NN1!''	
 5a??? ' s   35O4 )O4 4	P ?P c                *    | j                  |||      S )N)rs  rr  r  )r  r   rs  rr  s       rr   r  z"ExternKernel.require_exact_strides   s!    ""]- # 
 	
rs   c                *    | j                  |||      S )N)r   rr  r  )r  r   r   rr  s       rr   r  z!ExternKernel.require_stride_order  s    ""1E"OOrs   c                .    | j                  |t              S r|   )r  r  r  s     rr   require_channels_lastz"ExternKernel.require_channels_last
  s    ''+<==rs   c                .    | j                  |t              S r|   )r  r  r  s     rr   require_channels_last_3dz%ExternKernel.require_channels_last_3d  s    ''+=>>rs   c                    | j                  |t        t        t        t	        |j                                                       S r|   )r  r^   r  r   r   r   r  s     rr   require_contiguouszExternKernel.require_contiguous  s/    ''4s1::<?P9Q0R+STTrs   c                     y r|   rw   r   s    rr   r7  zExternKernel.apply_constraint  r%  rs   c                8   t         j                  j                  rCg }d }|r]| j                  rQt	        | j
                        t	        |      k(  sJ d       | j                  D ci c]  }|j                  d      | }}t        | j
                        D ]  \  }}|*|j                  ||         }|r|j                  d      nd }n\t	        | j                        |z   }	| j                  r6|	t	        | j                        k  r| j                  |	   j                  d      nd }|j                  t         j                  j                  j                  ||              |S t        t         j                  j                  j                  | j
                        S c c}w )NzDnames passed to codegen_const_args does not match self.constant_argsr   rn   )rP   r   rJ  r  r   r  r  r   r  re  r(  val_to_arg_strr   )
r   rl  r"  name_to_arg_propertiesr  r   r   proptype_r   s
             rr   codegen_const_argszExternKernel.codegen_const_args  s~   77F
 &*",,4--.#3  ZYZ  594G4G*-0CGGFOS(*& * "$"4"45 1)5155eAh?D04DHHV,$Edkk*Q.C  ..3T=P=P9Q3Q ++C044V<! 
 GG((775A Mqww++::D<N<NOO)*s   $Fc                   g }t        | j                        D ]  \  }}t        |t              rD|D cg c]  }|j	                          }}ddj                  |       d}|j                  |       [t        j                  j                  r| j                  r|t        | j                        k  sJ d       | j                  |   j                  d      }|j                  t        j                  j                  j                  ||             |j                  |j	                                 |j                  | j!                                |S c c}w )N[r  ]z-Invalid access to ExternKernel.arg_propertiesrn   )r   r  r]   r^   r  r   re  rP   r   rJ  r  r   r  r(  r  rk  r  )r   r~   r   r   rl  r  r  s          rr   codegen_argszExternKernel.codegen_args;  s.   dkk* 	7DAq!T"89:1,,.::&'		%(8'9$;!-.77&&..1s++8 4 GFG  !//266v>EKK,,;;u KK 3 3 56#	7$ 	D++-.# ;s   Ec                   || j                   v r| j                   j                  |      S | j                  rE| j                  j                  |      r*| j                  j                  |      j                  d      S t        | d      )Nr  z not in self.allarg_properties)r   r  r/  r  )r   r4  s     rr   get_kwargs_valuezExternKernel.get_kwargs_valueR  sr    t{{";;??8,,!!d&<&<&@&@&J))--h7;;OLL H:-K!LMMrs   c           	        t         j                  j                  rg }| j                  D ]  }|r|dk(  r| j	                  |      }t        |t        j                        r|j                  |       H| j                  r8|| j                  v r*| j                  j                  |      j                  d      nd }|j                  t         j                  j                  j                  ||              |S | j                  j                         D cg c]3  \  }}| dt         j                  j                  j                  |       5 }}}|S c c}}w )Nr   rn   ri  )rP   r   rJ  r  r  r]   rh   r   re  r/  r  r(  r  r   rR  )r   skip_outr   r4  rK  r  ks          rr   codegen_kwargszExternKernel.codegen_kwargsZ  s0   77F >> E 1))(3a,MM!$  11h$BXBX6X ..228<@@H! 
 MM,,;;u0  !KK--/Aq #Qqww++::1=>?F  	s   8Ec           	        t         j                  rt        j                  j                  st        | j                               dk(  ry t        j                  j                  j                  | j                               }t        j                  j                  j                  | j                               }|j                  d| j                          d| d| d       y y y )Nr   zassert_size_stride(r  r   )r3   size_assertsrP   r   rJ  rL   r   r(  codegen_shape_tupler  r9  r  )r   r:  r   r   s       rr   codegen_size_assertsz!ExternKernel.codegen_size_assertsw  s    qww':':T]]_-277'';;DMMOLDWW))==doo>OPF%dmmo%6bbJ (;rs   c                N    | j                         }| j                         }|g g|fS )zD
        get output sizes and strides, for template_codegen
        )r   r  )r   _sizerv  s      rr   get_group_stridezExternKernel.get_group_stride  s*     //#r{G##rs   c                   t         j                  j                  }| j                         }| j	                         }|D cg c]  }|j                  |       }}t        t        |            D cg c]  }t        d|        }}t        t        t        |            |j                  d      }t        |      D 	ci c]  \  }}	|	|
 }
}}	t        t        |
            D cg c]  }|
|   	 }}|D cg c]  }||   	 }}| j                         } ||      }t         j                  j                  j                  |||g      \  }}}t        d      \  }}t        t!        | ||D cg c]
  } ||       c}                  }t#        t%        j&                  |      |      }|t)        |      fS c c}w c c}w c c}	}w c c}w c c}w c c}w )zC
        Manually get canonicalization of the output index
        dT)r6  r  c)rP   r   r   r   r  r   r   r   rJ   rc  r  r   r  r]  r:   r`   r   rM   rh   r  r_   )r   r   r  r  r   r   rJ  index_orderr   r   r   r   r  r   	new_sizesr   rb  r   add_varreplacements                       rr   canonicalizezExternKernel.canonicalize  s   
 77##//#29:Q8%%a(::;@U;LMa(1QC1M
MU3w<0g6I6ISWX+4[+ABxsC#s(BB$)#f+$67q77-23jm3
3##%
#$%GG$4$4$D$Dw%
!	7E !%
73z7	3R1GAJ3R+STU5<<.<eI&&&+ ;M C73 4Ss#   F5/F:>F?$G6G
+Gc                    t               }| j                  D ]  }|t        |      z  } | j                  j	                         D ]  }|t        |      z  } |S r|   )r-   r  maybe_free_unbacked_symbolsr   ra   )r   r  r  s      rr   r"  z%ExternKernel.get_unbacked_symbol_uses  sa     '1l%% 	2C,S11A	2;;%%' 	2C,S11A	2rs   c           
     "   t        | dd       }d|g}|t        j                  |       D cg c]'  }|j                   dt        | |j                         ) c}z  }|j	                  d| j
                         | j                  |      S c c}w )Nr  zpython_kernel_name=ri  rj  )r}   dataclassesfieldsr   re  r3  r   )r   kernel_namer   fields       rr   rm  zExternKernel.__str__  s    d$8$?!+1
 	$++D1
 zzl!GD%**567
 	
 	|D$4$4#789u%%
s   ,Brw   NNNNrw   NrU  rW  r  r|   r  r  )r  r  )ru   ziTuple[Any, List[Any], List[Any], Callable[[Any, Any], Any], Optional[Dict[sympy.Symbol, pytree.KeyPath]]])NNF)r   zOptional[Sequence[int]]rs  zOptional[Sequence[_IntLike]])rl  zOptional[List[str]]rT  )7r   r%  r&  r  r'  r  r  r`   r   r  r  r  r^   r  r  r  r  r  r   r  rM  rO  r$  r5  rg  r=  r@  r"  r#  rV  r(  rX  r  rz  r  r  r  r  r  r  r  r  r  r7  r  r  r  r  r  r  r  r"  rm  r  r  r  s   @rr   r  r    sl   %'M?'.[..tDFND-1K*1(,,%)O]) 4E;3D3D4!= 
 	    6:N29<@9@<MK<M<M=9  .?[->->t-T*T &(,<. D D!
*
"$$L	
 
 
 ^

^
 ^
@ A
 A
F ! !< ! !  *.6:x 'x 4	x xt 
 

 P P > > ? ? U U PD.N:	$'>
& Hrs   r  c                  :     e Zd Zd Z	 	 	 	 	 	 	 d fd	Zd Z xZS )ExternKernelOutc                   | j                  |       g | j                         | j                  d      }| j                         }t        j
                  j                  r$| j                  dk(  rt        j                  rdnd}n| j                         }|j                  || j                         | j                  r | j                  j                         |       y d |       y )NT)r  ztorch::inductor::_mm_plus_mmaoti_torch__mm_plus_mm_outz torch::inductor::_mm_plus_mm_out)r=  r  r  rV  rP   r   rJ  r  r3   rT  generate_extern_kernel_outr  r  )r   r:  r~   r  s       rr   r@  zExternKernelOut.codegen  s    W%J""$Jt':':D':'IJ**,GG$$(FF
 (( -7  ..0K**""$484D4DD..0		
 KO		
rs   c
                    t         
|   d || j                  |      ||xs i d ||||	
       t        j                  j                  |       | _        t        j                  j                  |        y r|   rp  r  r  rP   r   r  r   r  )r   rZ  r  r  r   r  r  r  r  r  rr  s             rr   r  zExternKernelOut.__init__  si     	'Lb)	
 GG++D1		""4(rs   c                     yr  rw   r   s    rr   r  zExternKernelOut.should_allocate  r  rs   r  )r   r%  r&  r@  r  r  r  r  s   @rr   r  r    s)    
6 &()6rs   r  c                        e Zd Zd fdZ xZS )RandomSeedsc                0   t        j                  t         j                        }t        |   t        |t         j                  |g      g |j                  |j                  |ggdt        j                  rdndt        j                  j                         y )NrX  zaten.randint.low_outzat::_ops::randint_low_out::callzat::randint_out)rZ  r  r  r  r  r  )rb   r  r  rp  r  r  r  r  r3   rT  rB  randintlow_out)r   countr   limitsrr  s       rr   r  zRandomSeeds.__init__  sz    U[[)kkW
 !::vzzE7;5
 $$ >",, 	 	
rs   )r  rl   r   rZ  r  r  s   @rr   r  r    s    
 
rs   r  c                  >     e Zd Zd Z	 	 	 	 	 	 d fd	Zd Zd Z xZS )r  c                   | j                  |       g | j                         | j                         }t        j                  j
                  j                  | |       t        | j                  t              r| j                  |       y y r|   )r=  r  r  rP   r   r(  generate_extern_kernel_allocr]   rZ  r
  r  r   r:  r~   s      rr   r@  zExternKernelAlloc.codegen  sl    W%=""$=t':':'<=	99$Edkk6*%%g. +rs   c	                    t         	|   d || j                  |      ||xs i d ||||
       t        j                  j                  |       | _        t        j                  j                  |        y r|   r  )
r   rZ  r  r  r   r  r  r  r  rr  s
            rr   r  zExternKernelAlloc.__init__  si     	'Lb)	
 GG++D1		""4(rs   c                     yr;  rw   r   s    rr   r  z!ExternKernelAlloc.should_allocate3  r=  rs   c                    t         r|   r   r   s    rr   r7  z"ExternKernelAlloc.apply_constraint6  r   rs   )rw   NNNrw   N)r   r%  r&  r@  r  r  r7  r  r  s   @rr   r  r    s+    / &()4"rs   r  c                  8     e Zd ZdZd fdZddZd Zd Z xZS )r  zP
    An output buffer that represents the mutation of a pre-existing buffer
    c                    t         |   d |       |j                         }t        j                  j                  |       |g| _        || _        t        j                  j                  |       | _	        y r  )
rp  r  r  rP   r   r  mutation_namesmutating_noder  r   )r   rZ  mutated_noder  mutated_node_namerr  s        rr   r  zMutationOutput.__init__?  s`    d62(113	##$5601(5GG++D1	rs   c                    | j                   S r|   )r  r   s    rr   r   zMutationOutput.get_defining_opG  s    !!!rs   c                    | j                   S r|   )r  r   s    rr   r  z!MutationOutput.get_mutation_namesJ  r  rs   c                     yr;  rw   r   s    rr   r  zMutationOutput.should_allocateM  r=  rs   )r  r-  r!  )	r   r%  r&  r,  r  r   r  r  r  r  s   @rr   r  r  :  s    2"#rs   r  c                  N     e Zd Zd Zd Zd fdZddZ fdZd	dZd
dZ	 xZ
S )UserDefinedTritonKernelc                    ddl m} ddlm} |j	                  | j
                        }g }t        ||      r|j                  }|j                  }||fS )Nr   )	Autotuner)kernel_side_table)	triton.runtime.autotunerr  *torch._higher_order_ops.triton_kernel_wrapr  
get_kernel
kernel_idxr]   configsr   )r   r  r  r  r  s        rr   get_kernel_and_configsz.UserDefinedTritonKernel.get_kernel_and_configsR  sG    6P"--doo>fi(nnGYYFwrs   c                   | j                         \  }}|j                  ||| j                        \  }}| j                  D cg c]  }| j	                  |       }}g }t        | j                        D ]>  \  }	}
|j                  j                  |
      |j                  v s.|j                  |	       @ | j                  |       |j                  ||| j                  |||       y c c}w r|   )r  !define_user_defined_triton_kernelr   r  r  r   	arg_namesr   
constexprsre  r=  #generate_user_defined_triton_kernelgrid)r   r:  r  r  new_nametriton_metar  raw_argsconstexpr_indicesr   kwargs              rr   r@  zUserDefinedTritonKernel.codegen^  s    557 !( I IGT[[!
+ /3.P.P
)*D!!!$
 
 #D$F$FG 	.JC%%e,0A0AA!((-	.
 	W%33h		7KAR	

s   C&c                L    t         |          t        | j                        z  S r|   )rp  r"  r(   r  rq  s    rr   r"  z0UserDefinedTritonKernel.get_unbacked_symbol_usesw  s"     w/14I$))4TTTrs   c                    t               S r|   r,   r   s    rr   rO  z0UserDefinedTritonKernel.get_unbacked_symbol_defs|  rP  rs   c                  g }i }g }|j                         D ]f  \  }}t        |t              r;t        j	                  | j                  |            }	|j                  |	       |	||<   Q|j                  |       |||<   h t        |      dk7  sJ |d   j                         | _	        t        | -  d t        | j                        |t        |      |       || _        || _        | j!                         \  }
}|
j"                  D cg c]	  }||v s| c}| _        ddlm} t        |      dkD  r|d   j*                  ni } ||
i ||      D cg c]  }||   	 c}| _        | j,                  D cg c]"  }t/        t        | j                        ||       $ c}| _        t2        j4                  j7                  |        y c c}w c c}w c c}w )Nr   )identify_mutated_tensors)rR  r]   rX   r  r  r  re  r   r   r   rp  r  r  r_   r  r  r  r  r  r  r  r   mutable_argsr  r   rP   r   r  )r   r  r  kernel_argsr  r   r  r  rK  r   r  r  r  r  autotuned_kwargsr6  rl  rr  s                    rr   r  z UserDefinedTritonKernel.__init__  s   %%' 	DAq!Y' 99$:L:LQ:OPa q	$$Q'q		 6{aQi**,t{{#- 	
 %	557 "++.
sk/AC.
* 	X03Gq0@71:,,b 0;;;*:;
 
 ((!
 :dkk2C>!
 	
""4(%.

!
s   	GGG3'Gc                ,    t        | j                        S r|   )r^   r   r   s    rr   rM  z#UserDefinedTritonKernel.get_outputs  s    D))**rs   c                    | j                   S r|   ru  r   s    rr   r   z"UserDefinedTritonKernel.get_device  rv  rs   rW  rU  )ru   rZ  )r   r%  r&  r  r@  r"  rO  r  rM  r   r  r  s   @rr   r  r  Q  s(    

2U
.)`+rs   r  c                  <     e Zd ZdZd Zd Zd ZddZ fdZ xZ	S )InplaceBernoulliFallbackE
    This needs to be a custom class to handle mutation properly
    c                   d | j                   D        \  }t        j                  j                  rlt        j
                  r\|j                  | j                          d| ddj                  t        t        | j                               d|j                          y |j                  | j                          d| ddj                  t        t        | j                               d|j                          y )Nc              3  <   K   | ]  }|j                           y wr|   r  rG  r   s     rr   rI  z3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     ;!##%;rk  r   r  z, NULL)r   )r  rP   r   rJ  r3   rT  r9  rV  r   r   reprr  ending)r   r:  r   s      rr   r@  z InplaceBernoulliFallback.codegen  s    ;t{{;776#8#8 '')*!A3b3tTEWEW;X1Y0ZZabibpbpaqr '')*!A3b3tTEWEW;X1Y0ZZ[\c\j\j[klrs   c                     yr;  rw   r   s    rr   r  z(InplaceBernoulliFallback.should_allocate  r=  rs   c                >    | j                   d   j                         gS r  r  r  r   s    rr   r  z+InplaceBernoulliFallback.get_mutation_names      A'')**rs   c                    t               S r|   r,   r   s    rr   rO  z1InplaceBernoulliFallback.get_unbacked_symbol_defs  rP  rs   c                   t         |   d t        |j                               | j	                  |g      ||       t
        j                  j                  |j                                t
        j                  j                  |       | _
        t
        j                  j                  |        t        j                  sd| _        y y )Nr  zat::native::bernoulli_)rp  r  r  r   r  rP   r   r  r  r  r   r  r3   rT  r  )r   r  r   r  rr  s       rr   r  z!InplaceBernoulliFallback.__init__  s    q||~&$# 	 	
 	
##AJJL1GG++D1		""4($$#;D  %rs   rW  
r   r%  r&  r,  r@  r  r  rO  r  r  r  s   @rr   r  r    s&    +< <rs   r  c                  P     e Zd ZdZd Zd Zd ZddZ fdZe	d	d
d       Z
 xZS )InplaceCopyFallbackr  c                P    | j                         \  }}}|j                  ||       y r|   )r  codegen_device_copy)r   r:  r  r  non_blockings        rr   r@  zInplaceCopyFallback.codegen  s'    #'#4#4#6 c<##C-rs   c                     yr;  rw   r   s    rr   r  z#InplaceCopyFallback.should_allocate  r=  rs   c                >    | j                   d   j                         gS r  r  r   s    rr   r  z&InplaceCopyFallback.get_mutation_names  r	  rs   c                    t               S r|   r,   r   s    rr   rO  z,InplaceCopyFallback.get_unbacked_symbol_defs  rP  rs   c                :   t         |   d |||dt        j                  rdnd       t        j
                  j                  |d   j                                t        j
                  j                  |       | _	        t        j
                  j                  |        y )Nz
aten.copy_aoti_torch_copy_zat::_ops::copy_::call)r  r  r   )rp  r  r3   rT  rP   r   r  r  r  r   r  )r   rZ  r  r  rr  s       rr   r  zInplaceCopyFallback.__init__  s~     	+&,&;&;"AX 	 		
 	
##F1I$6$6$89GG++D1		""4(rs   c                    ||fD cg c]  }| j                  |       }}|f}t        t        |j                               ||      }|S c c}w r|   )r  r  r  r   )r  r  r  r  r   r  r  r"  s           rr   r  zInplaceCopyFallback.create	  sV    14c
;1###A&;;%$s~~'(

  <s   A
rW  r  )r  r   )r   r%  r&  r,  r@  r  r  rO  r  r  r  r  r  s   @rr   r  r    s5    .+)(  rs   r  c                  0    e Zd ZdZd Zd Zd ZddZd Zy)	MutatingFirstArgExternKernelr  c                    g d | j                   D        t        t        | j                        }|j	                  | j                          ddj                  |       d|j                          y )Nc              3  <   K   | ]  }|j                           y wr|   r  r  s     rr   rI  z7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>  s     9a!!#9rk  r   r  r   )r  r   r  r  r9  rV  r   r  )r   r:  argrefss      rr   r@  z$MutatingFirstArgExternKernel.codegen  sl    
9T[[9
t))*
 	##%&a		'(:';1W^^<LM	
rs   c                     yr;  rw   r   s    rr   r  z,MutatingFirstArgExternKernel.should_allocate#  r=  rs   c                >    | j                   d   j                         gS r  r  r   s    rr   r  z/MutatingFirstArgExternKernel.get_mutation_names&  r	  rs   c                    t               S r|   r,   r   s    rr   rO  z5MutatingFirstArgExternKernel.get_unbacked_symbol_defs)  rP  rs   c                     yr  rw   r   s    rr   has_side_effectsz-MutatingFirstArgExternKernel.has_side_effects,  r  rs   NrW  )	r   r%  r&  r,  r@  r  r  rO  r"  rw   rs   rr   r  r    s     
+rs   r  c                       e Zd Z fdZ xZS )ResizeStorageBytesc                (   t        |t              sJ d       t        |   d t	        |j                               | j                  |g      |f       t        j                  j                  |j                                t        j                  j                  |       | _        t        j                  j                  |        d| _        d| _        t        j                  j                   j#                  |j$                  j                                y )NzTODO: dynamic shapes)r  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)r]   rl   rp  r  r  r   r  rP   r   r  r  r  r   r  r  r  never_reuse_buffersr  r[  )r   variabler  rr  s      rr   r  zResizeStorageBytes.__init__1  s    (C(@*@@(x**,-
+#+	 	 	
 	
##H$5$5$78GG++D1		""4("FG	##''(>(>(@Ars   r  r  s   @rr   r$  r$  0  s    B Brs   r$  c                  $     e Zd Z fdZd Z xZS )SetSourceTensorKernelc                   |j                          t        | 	  |j                         ||gdt        j
                  j                  j                  j                         t        j                  j                  j                  |j                  j                                t        j                  j                  j                  |j                                t        j                  j                  j                  | j                                |j                         }t!        t#        |      ||       t!        t#        |      ||       g| _        y )Nz!torch.ops.aten.set_.source_Tensor)r  r  )rw  rp  r  r   rb   rN   rB  set_source_TensorrP   r   r&  r  r[  r  r   r  r  r   )r   self_tensorstorage_tensorr   rr  s       rr   r  zSetSourceTensorKernel.__init__B  s    !!#""$.)B		++99	 	 	
 	
##''(8(8(A(A(CD	##''(?(?(AB	##''8**,:f-{DA:f-~tD!
rs   c                v    | j                   d   j                         | j                   d   j                         gS r  r  r   s    rr   r  z2SetSourceTensorKernel.get_inputs_that_alias_outputS  s/    A'')4;;q>+B+B+DEErs   )r   r%  r&  r  r  r  r  s   @rr   r)  r)  A  s    
"Frs   r)  c                  P     e Zd ZdZd Zd Zd Zd
dZddd	 	 	 	 	 d fd	Z xZ	S )ScatterFallbackz
    This needs to be a custom class to handle mutation properly.
    This class handles both aten.scatter_ and aten.scatter_reduce_.
    It also handle the case `src` being a scalar properly.
    c           
        | j                   d   }t        j                  j                  rddd}||v r||   }| j                  rd | j
                  D        \  }}}n%d | j
                  D        \  }}| j                  d   }|j                  ||| j                  d   ||g| j                  | j                  | j                  || j                                y )	Nr  r  r  )r  multiplyc              3  <   K   | ]  }|j                           y wr|   r  r  s     rr   rI  z*ScatterFallback.codegen.<locals>.<genexpr>g  s     Jq224Jrk  c              3  <   K   | ]  }|j                           y wr|   r  r  s     rr   rI  z*ScatterFallback.codegen.<locals>.<genexpr>i  s     EA!--/Erk  r2   r   )r   rP   r   rJ  src_is_tensorr  r  generate_scatter_fallbackr  r  r  )r   r:  r  get_operator_enumr   r   r  s          rr   r@  zScatterFallback.codegen^  s    X&77(-6 B***62JdkkJOQsEEJQ$$Q'C))""1%uc2  ##!	
rs   c                     yr;  rw   r   s    rr   r  zScatterFallback.should_allocateu  r=  rs   c                >    | j                   d   j                         gS r  r  r   s    rr   r  z"ScatterFallback.get_mutation_namesx  r	  rs   c                    t               S r|   r,   r   s    rr   rO  z(ScatterFallback.get_unbacked_symbol_defs{  rP  rs   NTr  include_selfc          
     d   t        |t              | _        | j                  r%|||fD cg c]  }| j                  |       }	}|f}
n$||fD cg c]  }| j                  |       }	}||f}
t        |   d t        |j                               | j                  |	      |
||dt        |      ddg|       t        j                  j                  |j                                t        j                  j                  |       | _        t        j                  j!                  |        y c c}w c c}w )Nr<  r  r=  )r  r  r  )r]   rX   r6  r  rp  r  r  r   r  r   rP   r   r  r  r  r   r  )r   r  r   r	  r   r  r  r=  r   tensorsr  rr  s              rr   r  zScatterFallback.__init__~  s    (Y7 78%oFt))!,FGF FM78%jAt))!,AGA #JMq||~&(|<";/+3^*D# 	 		
 	
##AJJL1GG++D1		""4(% G Bs   D(D-rW  )r	  rl   r  r  r=  r   r  r  s   @rr   r1  r1  W  sL    
.+ !%!!) 	!) !) !) !)rs   r1  c                  <     e Zd ZdZd Zd Zd ZddZ fdZ xZ	S )IndexPutFallbackzQ
    This needs to be a custom class to handle mutation and indices properly
    c                   d | j                   D        ^}}}g }t        |      }t        | j                        D ]b  \  }}| j                  |   |j	                  t        |             0|j	                  t        j                  j                  j                         d  |j                  | j                         |||g| j                           y )Nc              3  <   K   | ]  }|j                           y wr|   r  r  s     rr   rI  z+IndexPutFallback.codegen.<locals>.<genexpr>  s     &Rq':':'<&Rrk  )r  r[  r   rj  re  r\  rP   r   r(  r.  generate_index_put_fallbackrV  r  )	r   r:  r   ra   valid_indicesrj  iter_valid_indicesr   r   s	            rr   r@  zIndexPutFallback.codegen  s    &Rdkk&R#F]!-0dll+ 	>DAq||A*t$678qww33<<=		> 	,++  "Aw	
9=9P9P9R	
rs   c                     yr;  rw   r   s    rr   r  z IndexPutFallback.should_allocate  r=  rs   c                >    | j                   d   j                         gS r  r  r   s    rr   r  z#IndexPutFallback.get_mutation_names  r	  rs   c                    t               S r|   r,   r   s    rr   rO  z)IndexPutFallback.get_unbacked_symbol_defs  rP  rs   c           	     ,   || _         |D cg c]  }||	 }}||g|D cg c]  }| j                  |       }}t        j                  rdnd}	t        
|   d t        j                               | j                  |      |fd|	|       t        j                  j                  | j                  d   j                                t        j                  j                  |       | _        t        j                  j!                  |        y c c}w c c}w )Naoti_torch_index_put_outzat::index_put_outzaten.index_put_)r  r  r  r   )rj  r  r3   rT  rp  r  r  r   r  rP   r   r  r  r  r  r   r  )r   r  r   rj  ra   
accumulater   rE  r?  r  rr  s             rr   r  zIndexPutFallback.__init__  s    $+=qq}==34f2M}2MNQ4%%a(NN*0*?*?&EX 	 	q||~&(M0+# 	 	
 	
##DKKN$;$;$=>GG++D1		""4(! >Ns   DDDrW  r  r  s   @rr   rA  rA    s&    
+) )rs   rA  c                  "    e Zd Zed        Zd Zy)
DeviceCopyc                   |j                         sKt        d |j                         D              r+t        j                  j
                  s|j                  |      S t        j                  j                  |       t        j                  j                  |j                                t        d       t        t        ||j                         |j                               | j!                  |      g      S )Nc              3  T   K   | ]   }|t         j                  j                  v  " y wr|   )rP   r   rj  r  s     rr   rI  z$DeviceCopy.create.<locals>.<genexpr>  s     GqA***Gs   &(zDeviceCopy in input programrX  )r<  rd  r   r3   aot_inductoruse_runtime_constant_foldingr  rP   r   add_device_infor   rE   rN  r   r   r   r  )r  r   r   s      rr   r  zDeviceCopy.create  s     GA4D4D4FGG''DD''//	'	/78kkmZZ\
 q!"
 	
rs   c                    | j                         }t        |      dk(  sJ | j                  r.|j                  |d   | j                  j	                                y |j                  |d   | j	                                y r  )r  r   r  r  r  r  s      rr   r@  zDeviceCopy.codegen  si      "4yA~~''Q1A1A1S1S1UV''Q1G1G1IJrs   N)r   r%  r&  r  r  r@  rw   rs   rr   rN  rN    s    
 
*Krs   rN  c                  <     e Zd ZdZd Zd Z fdZddZd Z xZ	S )rf   z;
    The result of a call to aten._local_scalar_dense.
    c                     yrS  rw   r   s    rr   r  zDynamicScalar.get_reads  rT  rs   c                     yr;  rw   r   s    rr   r  zDynamicScalar.should_allocate  r=  rs   c                    |j                          t        | 	  d t        t	        j
                  d            | j                  |g             || _        || _        y r   )	r  rp  r  r  rb   r   r  symkeypath)r   rY  rZ  r[  rr  s       rr   r  zDynamicScalar.__init__  sH    z%,,u*=>@S@SUYTZ@[\rs   c                .    t        | j                  g      S r|   )r-   rY  r   s    rr   rO  z&DynamicScalar.get_unbacked_symbol_defs  s    488*%%rs   c                &    |j                  |        y r|   )codegen_dynamic_scalarr?  s     rr   r@  zDynamicScalar.codegen  s    &&t,rs   rW  )
r   r%  r&  r,  r  r  r  rO  r@  r  r  s   @rr   rf   rf     s!    &-rs   rf   c                  @     e Zd ZdZd Zd Z fdZd Zd Zd Z	 xZ
S )rg   z5
    The result of a call to aten._assert_scalar
    c                     yrS  rw   r   s    rr   r  zAssertScalar.get_reads  rT  rs   c                     yr;  rw   r   s    rr   r  zAssertScalar.should_allocate  r=  rs   c                |    t         |   d t        t        j                  d            g        || _        || _        y r   )rp  r  r  rb   r   scalarry  )r   rb  ry  rr  s      rr   r  zAssertScalar.__init__  s7    u||E*+	
 rs   c                     yr  rw   r   s    rr   r"  zAssertScalar.has_side_effects!  r  rs   c                ,    t        | j                        S r|   )r(   rb  r   s    rr   r"  z%AssertScalar.get_unbacked_symbol_uses$  s    $T[[11rs   c                ^   t         j                  j                  ry |j                  dt         j                  j                  j                  | j                  d       d       |j                  dt        | j                         d       |j                  | j                          d       y )Nzif not F)r  :z    raise RuntimeError(r   z = None)
rP   r   rJ  r9  r(  codegen_python_sizevarrb  r  ry  r  r?  s     rr   r@  zAssertScalar.codegen'  s    77 !''..EEdkk\aEbccde  7TXX7GqIJ  19:rs   )r   r%  r&  r,  r  r  r  r"  r"  r@  r  r  s   @rr   rg   rg     s&    	2;rs   rg   c                  "    e Zd ZU ded<   ded<   y)ExternKernelNoder   r   zexport_schema.Noderp   Nr   r%  r&  r'  rw   rs   rr   ri  ri  ;  s    
I
rs   ri  c                       e Zd Z	 ddd fdZd ZddZd Zed        Zd Z	d	 Z
d
 Zd Zd Zedd       Zed        Z fdZ xZS )FallbackKernelNr  c               &    |t         j                  j                  k(  r6t        |      dk(  r(t        |      dk(  rt         j                  j                  }t
           |t        |      t        |      |       g  _        d _	        | _
        t        |t        j                  j                  t        j                  j                  f      sJ d| dt!        |       d       | _        | _        |i n| _        t(        j*                  j-                   j.                         g  _        g  _        t         j"                  t        j                  j                        ry d j"                  j5                         v ry  j"                  j6                  }t        j8                  j:                  j=                   j"                        r- j2                  j?                  |d   jA                                y |jB                  rtE        |      stG        d	|       |jH                  }	 j%                   jJ                   jL                        \  }
} fd
}t        j8                  j:                  jO                  ||
|      D ]  \  }} |||        y )Nr2   r  Fz#Fails to create FallbackKernel for r  z not supported_c10d_functionalr   z'NYI: Can't generate FallbackKernel for c                    t         j                  t        j                        rt        |t        t
        f      sJ t         j                  t        j                        xr2 t         j                  j                         t        j                        }t         j                  t        j                        xr2 t         j                  j                         t        j                        }|s$t         j                  t        j                        rt        |t
        t        f      rJ |y  j                  y  fd}|r|D ]
  } ||        y t         j                  t        j                        s|sJ  ||       y )Nc                    j                   j                  | j                                j                  j                  r>j
                  j                  t        t        | j                               |              y y r|   )	alias_namesre  r  
alias_infois_writer   r  r  r   )r   infor   s    rr   	add_aliaszPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias  sZ      ''

5??++))00&z!,,.'A1dK ,rs   )
r]   rn   rb   ListTyper^   r_   OptionalTypegetElementType
TensorTypers  )ru  r  is_optional_tensoris_list_tensorrv  
tensor_argr   s   `     rr   handle_aliasing_and_mutationz=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutation  s-   $))U^^4!#e}555!+		5--" "KTYY5579I9IJ  (		5>>B z		((*E,<,<HN "Z		5;K;K%L &cE4=999{& "% *Jj)* "$))U-=-=>BTTT#rs   )(rB  r  rm  r   Scalarrp  r  r_   r  use_runtime_dispatchr  r]   rb   r)  r*  rR  rn   r  rf  r   rP   r   warn_fallbackr  rr  r  r   r+  _libraryr  mutates_and_returns_first_argre  r  
is_mutabler   r   r,  r  r  
zip_schema)r   rZ  r  rr  nontensor_argsrf  r   r  schemaschema_argsr~   r~  ru  r  rr  s   `             rr   r  zFallbackKernel.__init__V  s>    dhhoo%K A%N#q(
 XX__F+.!	 	 	
 ')$)!!2

%%

..
 	X 14<.W	X 
 ","Nb	d556 '))+d&&

(F(FG !1!1!6!6!88
 !!)) >>==d>N>NO&&{1~'>'>'@A%;F%C%9&B  &&**4;;8J8JKf 	D --88vN 	4ID#(s3	4rs   c                N    t         d      sy t        t        j                  j                  j
                   j                        }|sy |j                         D ]G  \  }fd fd}|j                  |j                  |       d |        |j                          I y )Nr  c                   |dk(  r| S t        |      dk\  r_t        |d   t              rLt        |d   t        j                        r/ |  d|d   j
                   d|d   j                   d|dd        S t        |d   t              r |  d|d   j
                   d|dd        S t        |d   t        j                        rYt        j                  j                  r  d	|d   j                   d
|  d|dd        S  |  d|d   j                   d|dd        S t        |d   t              r |  d|d   j                   d|dd        S t        d|       )Nrw   r   r   r2   rC  r   r   z()z	std::get<z>(r  r  z.__floordiv__(zunrecognized keypath )r   r]   r%   r]  SequenceKeyr   r   rP   r   rJ  r'   r  r  )rL  rZ  gos     rr   r  z7FallbackKernel.codegen_unbacked_symbol_defs.<locals>.go  s   b=K LA%"71:}="71:v/A/AB&'!*//!2!GAJNN3C1Ewqr{   
M:a
'8;WQR[II
F,>,>? 77.. Ywqz~~&6ba@'!"+N  4&'!*..)9 ;WQR[I
  
K8 nWQZ5G5G4HJGTUTVKXX(+@	)JKKrs   c                    t         j                  j                  rt        j                  rt        j                        dk(  r$  j                  d   j                               S t        d   t        j                        sJ   j                  d   j                     j                         dd        S   j                               S r  )rP   r   rJ  r3   rT  r   r  r  r]   r]  r  r   )r  rZ  r   s   rr   go_outerz=FallbackKernel.codegen_unbacked_symbol_defs.<locals>.go_outer  s    77&&6+@+@
 4<<(A-!$,,q/":":"<gFF)'!*f6H6HIII!$,,wqz~~">"G"G"I7STSU;WWdmmow77rs   z = )r4  r*   rP   r   r   r   r  rR  r9  codegen_unbacked_symbol_declr  )r   r:  r  r   r  r  rZ  s   `    @@rr   codegen_unbacked_symbol_defsz+FallbackKernel.codegen_unbacked_symbol_defs  s    t015GG&&(>(>
 !+113 -	JAwL68 77:;3xzl7>>JZ[W-	rs   c                    t        | dd       x}r<t        t        j                  j                  j
                  |      j                         S t               S )Nr  )r}   r*   rP   r   r   r   r  r-   )r   r  s     rr   rO  z'FallbackKernel.get_unbacked_symbol_defs  sL     '.A4 HHH,  **,=df <rs   c                ,   t         j                   G d d             }| j                  D cg c]  } ||j                                }}| j	                  || j
                        \  }}t        j                  j                  rt        | j                  t        j                  j                        r| j                  ||      }t        | j                  j                   j"                  |      D cg c]9  \  }}t        j                  j$                  j'                  ||j(                        ; }}}n6|D cg c]+  }t        j                  j$                  j'                  |      - }}| j*                  j-                  |       |S c c}w c c}}w c c}w )Nc                       e Zd ZU ded<   ddZy))FallbackKernel.codegen_args.<locals>.Shimr   refc                    | j                   S r|   )r  r   s    rr   r  z2FallbackKernel.codegen_args.<locals>.Shim.__repr__  s    xxrs   NrT  )r   r%  r&  r'  r  rw   rs   rr   Shimr    s    H rs   r  )r  	dataclassr  r  rf  r  rP   r   rJ  r]   r  rb   r)  r*  r5  r   r+  r,  r(  r  r.  r   r  )r   r  r   rr  r~   r   params          rr   r  zFallbackKernel.codegen_args  sH   				  	  
	  =AKKHqtA//12HH**;8J8JKf77:d.>.>

@U@U#V..tV<D !$D$4$4$<$<$F$F ME1 $$33AuGD 
 EIIqAGG((77:IDI 	6" I
 Js   F1>F70Fc                   | r3| D cg c]#  }|j                         s|j                         % }}|d   S t        |t        j                        r|j                  S t        |t
        t        f      r\t        d |D              }|D cg c]  }|s|	 }}t        |      dk(  r|d   S |D ]  }t        |j                        s|c S  |d   S y c c}w c c}w )Nr   c              3  H   K   | ]  }t         j                  d |        y wr|   )rl  find_devicer<  s     rr   rI  z-FallbackKernel.find_device.<locals>.<genexpr>7  s"      $89**43$ru  r2   )r   r]   rb   rm  r   r^   r_   r-   r   rH   rn   )rr  rw  r  devices
device_setr   s         rr   r  zFallbackKernel.find_device/  s    3>SC#..BRs~~'SGS1:nell3!(((ntUm4# $=K$ J -7A&&vAGA7|q qz!! "&++&!M" 1:! T Bs   CCCCc                    t        | j                  t        j                  j                        ryt        | j                        j                         S r;  )r]   r  rb   r)  rR  r$   r  r   s    rr   r"  zFallbackKernel.has_side_effectsD  s9    d&&

(F(FGt//0;;==rs   c                    | j                   S r|   )rr  r   s    rr   r  z+FallbackKernel.get_inputs_that_alias_outputI  ry  rs   c                N    t        | j                        dk  sJ | j                  S r  )r   r  r   s    rr   r  z!FallbackKernel.get_mutation_namesL  s'    4&&'1,,,"""rs   c           	     &   t        | t              sJ | j                  | j                  | j                        \  }}| j                  ||      }| j                  D cg c]  }|j                  |d        }}t        j                  j                  sg ||S t        d d       }|j                  | j                  ||      }d }| j                  }|j                  j                  }	t!        |	      dk(  r$|	d   j"                  }
 ||
| j$                        g}nxt        | j$                  t&              sJ t!        |	      t!        | j$                        k(  sJ t)        |	| j$                        D cg c]  \  }} ||j"                  |       }}}t+        | j-                         t/        j0                  | j                  j3                         ||i             }t        j                  j4                  j7                  |       g ||S c c}w c c}}w )Nc           	     p   t        | t        j                        ro|}t        |t        t        f      rt        |      dk(  sJ |d   }t        j                  j                  t        j                  |j                                     S t        | t        j                        rxt        | j                         t        j                        rPt        j                  j                  |D cg c]&  }t        j                  |j                               ( c}      S t        dt        |              c c}w )Nr2   r   rF  )	as_tensor)
as_tensorszUnsupported return type )r]   rb   rz  r^   r_   r   export_schemaArgumentr  TensorArgumentr  rw  ry  RuntimeErrorrn   )return_typeoutputr   s      rr   handle_single_outputzFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_outpute  s   +u'7'78ftUm4v;!+++ )C$--44+::O 5   K8Z**,e.>.>> %--44 $*  &44#,,.I  5   #%=d;>O=P#QRR s   )+D3r2   r   )r  r  r  metadata)r   rp   )r]   rl  rf  r  r  r5  r  r  rP   r   aot_moder   serialize_inputsr  r+  returnsr   r.  r  r_   r   ri  r  r  Noder   extern_kernel_nodesre  )r   r~   r   r6  ordered_kwargs
serializernamed_argumentsr  r  r  r  output_argumentsreturn_schemar  rp   s                  rr   export_extern_kernel_nodez(FallbackKernel.export_extern_kernel_nodeV  s   $///**4;;8J8JKf**48-1-O-O
&)FJJsD!
 
 ww+T+N++*46
$55d6F6FfU	S. !!..((w<1!!*..K 4[$,, OP dllE222w<3t||#4444 .1$,,-G )M6 %]%<%<fE   
  ##'',,.&(	
 	
##**40''''y
V s   H/Hc                $   | j                   }|j                  dk(  rt        |t        j                  j
                        sJ t        j                  j                  rddl	m
} t        j                  rt        |      |vrt        j                  d|       d| _        n|j                  dk(  rXt        |t        j                  j
                        sJ t        j                  j                  r9t        j                  s)d| _        n!t        j                  j                  rd| _        | j                   r| j#                  |       d }d }t        j                  r| j%                         }n"g | j'                         | j)                         }|j+                  | j-                         | j.                  | j0                  || j2                  | j4                  | j6                  | j                   || j8                  
       n| j#                  |       g | j'                         | j)                         }t        j                  j:                  j=                  | |       t        | j>                  t@              r| jC                  |       | jE                  |       y )NrB  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantized)#r  rK  r]   rb   r)  r*  rP   r   rJ  torchgen.aoti.fallback_opsr  r3   rT  r   r|  r|  r  r=  r  r  r  6generate_extern_kernel_alloc_and_find_schema_if_neededr  r  r  rI  rH  rG  r  r(  generate_fallback_kernelrZ  r
  r  r  )r   r:  r  r  exported_argsr~   s         rr   r@  zFallbackKernel.codegen  s   !!v%fejj&;&;<<<ww""L((S[@U-U KKa 15D--fejj&;&;<<<ww"",,04D- ww"",0)$$  ) MD$$ $ > > @E**,Et/B/B/DEJJ''$$""##--     )AT&&(A4+>+>+@ADGG  99$E$++v.))'2))'2rs   c           	         t        | j                  | j                  t        | j	                               t        | j                                     S r|   )r  r   r   rC   r   r   )r  s    rr   tensor_to_layoutzFallbackKernel.tensor_to_layout  s9    MMLL%fkkm4%fmmo6	
 	
rs   c                    t         j                  f}||vrt        j                  j                  n	t               }|5    j                  |g|i |\  }}}}	}
d d d         j                        }|  t        |      ||	
      n |sJ d         t        |      ||	
       fd |g       }t        |t        t        t        f      r	|_        |S |g_        |S # 1 sw Y   xY w)Nrm  z"Not sure where to find device infoc                N    t         t        t        f      r. t                fdt	        t                     D              S t         t              r: j                         D ci c]  \  }}| |t               |fgz           c}}S t         t        j                        rt        j                               S t         t              r S t         t        j                        r j                  j                  S  J dt                d       y c c}}w )Nc              3  T   K   | ]  } |   t              |fgz          ! y wr|   )rn   )rG  r   generate_outputrj  r  s     rr   rI  zAFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>  s5      $ $F1Iw4<:K9L/LM$r  zFallbackKernel output type z is not supported)r]   r^   r_   rn   r   r   r`   rR  rb   rm  MultiOutputr  rl   SymIntrp   rL  )r  rj  r6  r  r  r  packeds   ``  rr   r  z.FallbackKernel.create.<locals>.generate_output   s   &4-0#tF| $"3v;/$   FD) %+LLN S g$v,9L8M.MNN  FELL1"((0 
 FC(FELL1{{''' NQ0f>OPQ"%s   +#D!)rB  *_fused_moving_avg_obs_fq_helper_functionalrP   r   rl  r   rz  r  r  r  r]   r^   r_   r`   r  )r  r  r~   r   fake_incorrect_kernelscontextrw  rr  rs  rf  r  r   r  r  r  s   `            @@rr   r  zFallbackKernel.create  s'   "&"Q"Q!S!'/E!EAGG;= 	  	< #""6;D;F;!	< n=!6""3F ???6!&)"3F	6 "."5geT23$FN  &YFN	< 	<s   C..C7c                     t         |          S r|   )rp  r7  rq  s    rr   r7  zFallbackKernel.apply_constraint"  s    w'))rs   r|   rW  )r  torch.Tensor)r   r%  r&  r  r  rO  r  r(  r  r"  r  r  r  r@  r  r  r  r7  r  r  s   @rr   rl  rl  U  s     ~4 ~4@8t .  (>
 #@(D83t 
 
 D DL* *rs   rl  c                  4     e Zd ZdZd Zd Zdd fd
Z xZS )ComplexViewz9View a complex number as two dtyped numbers or vice versac                     yr;  rw   r   s    rr   r  zComplexView.should_allocate*  r=  rs   c                >    | j                   d   j                         gS r  r  r   s    rr   r  z(ComplexView.get_inputs_that_alias_output-  s    A'')**rs   Nrm  c               0    t         |   ||||||       y )Nrm  )rp  r  )r   rZ  r  rr  r  rf  r  rr  s          rr   r  zComplexView.__init__1  s)     	/ 	 	
rs   )r   r%  r&  r,  r  r  r  r  r  s   @rr   r  r  &  s    C+ 
 
rs   r  c                      e Zd ZU ded<   y)r  rZ  r   Nrj  rw   rs   rr   r  r  E  s    rs   r  c                  @     e Zd Zd Zd Zd fdZddZd Zd Z xZ	S )	r  c                   t        |      dkD  r|d   \  }}t        |t              r| j                  | d| d|dd        S t        |t              rWt
        j                  j                  j                  || j                         t        |            }| j                  ||dd        S t        |t              r| j                  | d| d|dd        S t        d|      |S )Nr   r  r  r2   z['z']znon supported index type: )r   
issubclassr^   codegen_list_tuple_accessr_   rP   r   r(  codegen_tuple_accessr  r   r`   r  )r   basenamerj  ityper   tuple_accesss         rr   r  z%MultiOutput.codegen_list_tuple_accessN  s    w<!qzHE1%&55
!A3a6H'RSRT+VVE5) ww33HHdmmos1v  55lGABKPPE4(55
"QCr6JGTUTVKXX$%A5IIOrs   c                    |j                  | j                         | j                  | j                  d   j                         | j                               y r  )codegen_multi_outputr  r  r  rj  r?  s     rr   r@  zMultiOutput.codegen`  s>    $$MMO**4;;q>+B+B+DdllS	
rs   c                    t         |   d ||gd       t        j                  j	                  |       | _        t        j                  j                  |        || _        y rS  )rp  r  rP   r   r  r   r  rj  )r   rZ  r  rj  rr  s       rr   r  zMultiOutput.__init__f  sG    vw3GG++D1		""4(rs   c                <    | j                   d   j                         S r  )r  r"  r   s    rr   r"  z$MultiOutput.get_unbacked_symbol_usesl  s    {{1~6688rs   c                     yr;  rw   r   s    rr   r  zMultiOutput.should_allocateo  r=  rs   c                    | j                   D cg c]>  }t        |t              r,t        |j	                               dkD  r|j                         @ c}S c c}w r  )r  r]   rl  r   r  r  )r   inps     rr   r  z(MultiOutput.get_inputs_that_alias_outputr  sN     {{
#~.C4467!; LLN
 	
 
s   AA)rj  zList[Tuple[Any, ...]]rW  )
r   r%  r&  r  r@  r  r"  r  r  r  r  s   @rr   r  r  J  s!    $
9
rs   r  c                  ~    e Zd ZU dZded<   d Zd ZddZddZd Z	dd
Z
ed        Zd Zd Zed        ZddZeZy	)r  zC
    TensorBox / StorageBox allow in-place mutation of Tensors
    rY   r[  c                    t        | j                  |      }t        |      r|S t        t	        | j                        j
                   d| d      )NrC  z not callable)r}   r[  callableAttributeErrorrn   r   )r   r   r   s      rr   __getattr__zMutableBox.__getattr__  sE    TYY%B<ITYY 8 894&NOOrs   c                6    | j                   j                         S r|   r  r   s    rr   r  zMutableBox.realize  r  rs   c                6    | j                   j                         S r|   r  r   s    rr   r"  z#MutableBox.get_unbacked_symbol_uses  r  rs   c                6    | j                   j                         S r|   r  r   s    rr   r   zMutableBox.get_read_names  r  rs   c                6    | j                   j                         S r|   )r[  r   r   s    rr   r   zMutableBox.get_defining_op  s    yy((**rs   Nc                8    | j                   j                  |      S r|   )r[  r  r  s     rr   r  zMutableBox.codegen_reference  s    yy**622rs   c                6    | j                   j                         S r|   r  r   s    rr   rZ  zMutableBox.layout  s    yy##%%rs   c                    | j                   S r|   r#  r   s    rr   r   zMutableBox.get_layout  rv  rs   c                6    | j                   j                         S r|   r5  r   s    rr   r   zMutableBox.get_size  r  rs   c                .    | j                   j                  S r|   r  r   s    rr   r   zMutableBox.dtype  r  rs   c                t   t        | j                  t              rQt        |       j                   dt        | j                        j                   d}d}| j                  j                  }n&t        |       j                   d}| j                  }d}|t        t        |            |g}dj                  |      S )Nr   z))r   
)r]   r[  r  rn   r   r   r   r   )r   line0endlr  r   s        rr   rm  zMutableBox.__str__  s    dii,Dz**+1T$))_-E-E,FaHEDIINNEDz**+1-EIIED 3u:

 yyrs   rW  r#  r|   rT  )r   r%  r&  r,  r'  r  r  r"  r   r   r  r+  rZ  r   r   r   rm  r  rw   rs   rr   r  r  {  sk     LP#4*+3 & &$   " Hrs   r  c                      e Zd Zed        Zy)rX   c                *    t        t        |             S r|   )rX   ru  )r[  s    rr   r  zTensorBox.create  s    D)**rs   N)r   r%  r&  r(  r  rw   rs   rr   rX   rX     s    + +rs   c                  <    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
y	)
ru  c                    t        | j                  t        t        f      r4| j                  j	                         t
        j                  j                  v S yr;  )r]   r[  r#  rz  r  rP   r   graph_inputsr   s    rr   r  zStorageBox.is_input_buffer  s:    dii+!?@99%%'177+?+???rs   c                    t        | j                  t              xr4 | j                  j                         t        j
                  j                  v S r|   )r]   r[  r  r  rP   r   rj  r   s    rr   r  zStorageBox.is_module_buffer  s9    tyy>3 :		""$(9(99	
rs   c           	        t        | j                  t        t        t        t
        t        f      r| j                  j                         S t        | j                  t        t        t        t        f      sJ t        | j                               | j                  j                         }| j                  j                         }t        d t        | j                  j!                         | j                  j#                         | j                  j%                               | j                        | _        t&        j(                  j+                  | j                        | j                  _        t&        j(                  j/                  | j                         | j0                  | j                  _        || j                  _        || j                  _        | j                  j,                  S )NrX  rY  )r]   r[  r`  r  r#  rz  r  r  r  r  r  rH  rn   r5  r   r   r   r   r   rP   r   r  r   r  r   r3  r   )r   r3  r   s      rr   r  zStorageBox.realize  sO   II	
 99%%''$))iD$%GH 	
$IIK
 	
H ii//1II++-	"!yy++-ii))+YY'')
 
	 00;			""499- LL		 +		'		yy~~rs   c                    t        | j                  t        t        f      r9| j                  j	                         j
                  dkD  r| j                          yyy)zL
        Called on buffers we expect to be forced to realize later.
        r2   N)r]   r[  r  r  r  nontrivial_read_countr  r   s    rr   r!  zStorageBox.realize_hint  sF    
 tyy9i"89		**,BBQFLLN G :rs   c                    t        | j                  t              xr3 | j                         t        j
                  kD  xs | j                         S r|   )r]   r[  r  r  r3   realize_acc_reads_thresholdr  r   s    rr   r  z!StorageBox.has_exceeded_max_reads  s@    $))Y/ 
NNvAAA )&&(	
rs   c                F   |dkD  rt        | j                  t        t        f      r{t	        | j                        r3| j                  j                         ddg}t        fd|D              ry| j                         t        j                  kD  xs | j                         S y)zj
        A heuristic to decide if we should realize a tensor
        that is used multiple times.
        r2   expsigmoidc              3  :   K   | ]  }|j                   v   y wr|   )used_ops)rG  r   opcounts     rr   rI  z5StorageBox.should_realize_on_reuse.<locals>.<genexpr>  s     @qG,,,@s   TF)r]   r[  r  r  r   r  r  r  r3   realize_reads_thresholdr  )r   r  	heavy_opsr  s      @rr   should_realize_on_reusez"StorageBox.should_realize_on_reuse  s    
 19DII	9/EFdii ))446"I.	@i@@ 6#A#AA -**, rs   c                H    | j                  |      r| j                          y y r|   )r  r  r  s     rr   r   zStorageBox.mark_reuse  s    ''.LLN /rs   c                6    | j                   j                         S r|   r;  r   s    rr   r  zStorageBox.num_reads  r  rs   N)r   r%  r&  r  r  r  r!  r  r  r   r  rw   rs   rr   ru  ru    s+    

B
$%rs   ru  c                  0    e Zd ZU ded<   ded<   dZded<   y)Subgraphr   r   ztorch.fx.GraphModulegraph_moduleNzOptional[GraphLowering]r   )r   r%  r&  r'  r   rw   rs   rr   r  r    s    
I&&%)E")rs   r  c                    | D cg c]$  }t        |t              r|j                         n|& } }t        t	        d | D                    t        |       k  S c c}w )Nc              3  2   K   | ]  }t        |        y wr|   )id)rG  rl  s     rr   rI  z'_has_aliased_buffers.<locals>.<genexpr>$  s     ;"V*;rd  )r]   rz  r  r   r-   )buffersrl  s     rr   _has_aliased_buffersr    s^      !+6? COG 
 z;7;;<s7|KKs   )Ac                       e Zd ZU dZded<   dZded<   dZded<   dZded<   dZd	ed
<   	 	 	 	 	 	 	 	 	 	 d fdZ	e
	 	 	 	 	 	 	 	 dd       Zd Z xZS )ConditionalNr  	predicateOptional[List[TensorBox]]operandsOptional[Subgraph]true_subgraphfalse_subgraphOptional[List[MultiOutput]]r  c                P   || _         || _        || _        || _        g }t	        |t
              s|j                  |       |j                  |       t        | %  d ||       t        j                  j                  |       | _        t        j                  j                  |        y Nr   )r  r  r  r  r]   r0  re  rk  rp  r  rP   r   r  r   r  )r   r  r  r  r  rZ  r  rr  s          rr   r  zConditional.__init__/  s     # *,)%:;MM)$h 	 	
 GG++D1		""4(rs   c                :   | j                  |      }|D cg c]  }| j                  |       }}t        j                  j                  j                  d   }|D cg c]  }|j
                  d    }}||fD ]  }|j                  t        j                  j                  |j                  ||j                        |_        t        j                  |j                        5   |j                  j                  |  d d d         |j                  j                  }	|j                  j                  }
d|	fd|
ffD ]!  \  }}t        |	      st        d| d|        t        |	      t        |
      k(  s	J |	|
f       t        t!        |	|
            D ]  \  }\  }}|j#                         |j#                         k(  s
J |||f       |j%                         |j%                         k(  s
J |||f       |j'                         |j'                         k(  s
J |||f       |j)                         |j)                         k(  s
J |||f       |j+                         j,                  |j+                         j,                  k(  rJ |||f        t/        |t0              s|j'                         }n(t        |      dkD  sJ d	       |d   j'                         }t3        ||||t5        |      
      }t        |	      D cg c]w  \  }}t7        t9        |j'                         |j)                         |j#                         |j%                         |j+                         j,                        |t:        |fg      y }}}||_        |S c c}w c c}w # 1 sw Y   xY wc c}}w )NrV  r  gmexample_inputssubgraph_nametrue_fnfalse_fnzVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: r   zQWhen predicate is not a Tensor, there must be at least one operand in torch.cond.)r  r  r  r  rZ  r  )r  rP   r   r  r~   r  make_subgraphr  r   set_graph_handlerrungraph_outputsr  r  r   r   r   r   r  r   r   r   r  r]   r0  r  r  r  r  r^   r  )r  r  r#  r$  r  r   fx_operandsfake_operandssubgraphtrue_outputsfalse_outputsr   r  r   tofor   conditionalr  s                      rr   r  zConditional.createJ  s    %%i0	2:;QC%%a(;;gg**//30;<1<< (+ 		7H~~%!"!6!6,,#0"*-- "7 "
 ((8 7&HNN&&67 7		7 }}22 44(,7*m9TU 	MD'#L1$**./TU\T]_ 	 < C$66U}8UU6$S}%EF 	QKAxB;;=BKKM1>Ar2;>1==?bmmo5B2r{B5==?bmmo5B2r{B5<<>R\\^3@aR[@3==?))R]]_-C-CCPaR[PC	Q )%:;))+F H!cbc!a[++-F!!#$V,
, '|4
 6 !,,. **,*!,,.!,,.55 

 
" &O < =7 7P
s   N N!N
8A<N
N	c                &    |j                  |        y r|   )codegen_conditionalr?  s     rr   r@  zConditional.codegen  s    ##D)rs   )
r  rY   r  List[TensorBox]r  r  r  r  rZ  r  )r  rX   r#  r  r$  r  r  r3  )r   r%  r&  r  r'  r  r  r  r  r  r  r  r@  r  r  s   @rr   r  r  '  s    "&I&*.H'.(,M%,)-N&-+/G(/)) ")  	)
 !) ")6 OO O 	O
 "O Ob*rs   r  c                       e Zd ZU dZded<   dZded<   dZded<   dZded<   dZded	<   	 	 	 	 	 	 	 	 	 	 d fd
Z	e
	 	 	 	 	 	 	 	 dd       Zd Z xZS )	WhileLoopNr  carried_inputsadditional_inputsr  cond_subgraphbody_subgraphr  r  c                    || _         || _        || _        || _        t        |   d |||z          t        j                  j                  |       | _	        t        j                  j                  |        y r  )r6  r7  r8  r9  rp  r  rP   r   r  r   r  )r   r6  r7  r8  r9  rZ  rr  s         rr   r  zWhileLoop.__init__  sp     -!2**!$55 	 	
 GG++D1		""4(rs   c                	   |D cg c]  }| j                  |       }}|D cg c]  }| j                  |       }}||z   }t        j                  j                  j                  d   t        j                  j                  j                  d   z   }|D cg c]  }|j
                  d    }}||fD ]  }	|	j                  t        j                  j                  |	j                  ||	j                        |	_        t        j                  |	j                        5   |	j                  j                  |  d d d         |j                  j                  }
|j                  j                  }t        |      rt        d|       t        |
      dk(  sJ |
       |
d   j                         t         j"                  k(  sJ |
       t        |
d   j%                               dk(  sJ |
       t        |      dkD  sJ d       |d   j'                         }t        |      t        |      k(  s	J ||f       t)        t+        ||            D ]  \  }\  }}|j%                         |j%                         k(  s
J |||f       |j-                         |j-                         k(  s
J |||f       |j'                         |j'                         cxk(  r|k(  sn J ||||f       |j                         |j                         k(  s
J |||f       |j/                         j0                  |j/                         j0                  k(  rJ |||f        t3        ||||t5        |      	      }t)        |      D cg c]w  \  }}t7        t9        |j'                         |j                         |j%                         |j-                         |j/                         j0                  
      |t:        |fg      y }}}t+        ||      D ]g  \  }}|j=                         t        j                  j>                  v s1t        j                  j@                  jC                  |j=                                i ||_"        |S c c}w c c}w c c}w # 1 sw Y   xY wc c}}w )NrV  r  r  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: r2   r   z9torch.while_loop is assumed to have at least one operand.)r6  r7  r8  r9  rZ  r  )#r  rP   r   r  r~   r  r%  r  r   r&  r'  r(  r  r  r   r   rb   r   r   r   r   r   r  r   r  r5  r  r  r  r^   r  r  r&  r  r  )r  cond_fnbody_fnr6  r7  r   
all_inputsfx_all_inputsfake_all_inputsr+  cond_outputsbody_outputsr   r   opbo
while_loopr  r  r  r   s                        rr   r  zWhileLoop.create  s@    9GG1#++A.GG;LMaS..q1MM#&77
,,11"58L8L8Q8QRT8UU2?@Q166%=@@ '* 		9H~~%!"!6!6,,#0"*-- "7 "
 ((8 9&HNN&&89 9		9 }}22}}22- XXdWeg  < A%3|3%A((*ejj8F,F8<?++-.!3A\A3 
Oa	GF	G A))+ >"c,&77W.,9WW7$S%FG 	QKAxB;;=BKKM1>Ar2;>1==?bmmo5B2r{B5 ==?bmmo??T!RVATT?<<>R\\^3@aR[@3==?))R]]_-C-CCPaR[PC	Q )/!!$V,

* '|4
 6 !,,. **,*!,,.!,,.55 

 
 NG4 	@HC||~!5!55 ++//?	@ %
g HM A9 9V
s#   Q+Q0Q5Q:-A<R:R	c                &    |j                  |        y r|   )codegen_while_loopr?  s     rr   r@  zWhileLoop.codegen  s    ""4(rs   )
r6  r3  r7  r3  r8  r  r9  r  rZ  r  )r=  r  r>  r  r6  r3  r7  r3  )r   r%  r&  r6  r'  r7  r8  r9  r  r  r  r  r@  r  r  s   @rr   r5  r5    s    04N-43707(,M%,(,M%,+/G(/)') +)  	)
  ) "), ZZ Z (	Z
 +Z Zx)rs   r5  c                  8     e Zd Z	 ddd fdZ fdZd Z xZS )rm   Nrm  c          	     
   t         
|   |||||d |       ddlm}  ||g |||      }	|	J |	| _        t
        j                  j                  j                  |	d       | _	        | t
        j                  j                  |	<   y )N)r   r  r   )get_effect_key)
rp  r  torch._higher_order_ops.effectsrK  effect_typerP   r   effectful_opsr  prev_effect_buffer)r   rZ  r  rr  r  rf  r   r  rK  rM  rr  s             rr   r  zEffectfulKernel.__init__   s     	/ 	 	
 	C$V-L~-L-LfU&&&&"#''"7"7";";K"N-1k*rs   c                    t         |          }| j                  F|j                  j	                  t        j                  | j                  j                                      |S r|   )rp  rA  rO  rK  r  r4   rE  r  )r   ri  rr  s     rr   rA  zEffectfulKernel.get_read_writes=  sU    g-/"".!!$$T%<%<%E%E%GH rs   c                     yr  rw   r   s    rr   r"  z EffectfulKernel.has_side_effectsG  r  rs   r|   )r   r%  r&  r  rA  r"  r  r  s   @rr   rm   rm     s!     2 2:rs   rm   c                  6    e Zd ZU ded<   ded<   d Zd Zd	dZy)
r  r   r   ztorch._C.ScriptObjectr   c                    | j                   S r|   rF  r   s    rr   r  zTorchBindObject.get_nameP  r(  rs   c                     y r|   rw   r   s    rr   r   zTorchBindObject.get_deviceS  r  rs   Nc                    | j                   S r|   rF  r  s     rr   r  z!TorchBindObject.codegen_referenceV  r(  rs   r|   )r   r%  r&  r'  r  r   r  rw   rs   rr   r  r  K  s    
I  rs   r  c                  R    e Zd Zd Zd ZdddZe	 	 	 	 d	d       Ze	 	 d
d       Zy)_CollectiveKernelc                     yr;  rw   r   s    rr   r  z!_CollectiveKernel.should_allocate[  r=  rs   c                     yr  rw   r   s    rr   r"  z"_CollectiveKernel.has_side_effects^  r  rs   Nc                   ddl m} t        | j                        t        j
                  j                  u sJ d       | j                  }|j                  j                  | _	        |j                  j                  | _        | j                  j                  dd       d| j                   | _         ||      | _        |j                  j                  D cg c]  }|j                   s|j                   c}| _        y c c}w )Nr2   rD  z,Setting cpp kernel needs a valid op_overloadrF  r   )rN  rE  rn   r  rb   r)  r*  r+  r   r  rO  rG  rM  rH  rI  r,  r-  r  )r   r  rE  r  r   s        rr   r"  z%_CollectiveKernel.set_cpp_kernel_namec  s    6 !!"ejj&;&;;	:9	:;!!%~~22(.(D(D%!%!5!5!=!=dC!H I4KhKhJij.v6"NN44.
AFF.
* .
s   C=&C=c           
     $   t         j                  j                  5   | j                  ||g|i |\  }}}}}	d d d        	rJ | d|	        D ]  }
|
j	                           |d   j                         } | t        |      ||      }t        j                  |      }|j                  j                  |D cg c]  }t        t        |      ||       c}       |j                  j                  |D cg c]  }|j                          c}       d|v r_|j                  j                  t        t        |      |d   |             |j                  j                  |d   j                                y y # 1 sw Y   DxY wc c}w c c}w )Nr  r   r   )rP   r   rl  rz  r  r   r  r]  tree_leavesr   rk  r  rr  r  re  )r  r  r  r~   r   rw  rr  rs  rf  r  r}  r   r  inpsrl  r  s                   rr   create_inplacez _CollectiveKernel.create_inplacey  s    WW 	D #""66CDCFC!	D %E2C1D&EE$% 	!J 	! Q**,v
 !!&)&&HLM^Jv.V<M	

 	!!T"Bc3<<>"BCF?##**z&16%=&I %%fUm&<&<&>? 9	D 	D. N #Cs   E;<F9F;Fc           
     H   t         j                  j                  5   | j                  ||g|i |\  }}}}}	d d d        	rJ | d|	        D ]  }
|
j	                           t        t              rw| j                  ||      } | t        |      ||      }t        |      D cg c](  \  }}t        | j                  |      |t        |fg      * c}}|_        |j                  S  | | j                  |      ||      }|g|_        |S # 1 sw Y   xY wc c}}w )Nr  )rP   r   rl  rz  r  r]   r^   r  r  r   r  r  r  )r  r  r  r~   r   rw  rr  rs  rf  r  r}  r   r  r   r  s                  rr   create_out_of_placez%_CollectiveKernel.create_out_of_place  sY    WW 	D #""66CDCFC!	D %F3D2E&FF$% 	!J 	! nd+__[.AF!&)F "+>!: Av ((0AYKFN >>!$$^4F %XFNMO	D 	D*s   D+-DDr|   r  )r  !Union[TensorBox, List[TensorBox]]ru   rv   )r  ra  )	r   r%  r&  r  r"  r"  r  r^  r`  rw   rs   rr   rW  rW  Z  sV    

, $@>$@	$@ $@x *>* *rs   rW  c                  6     e Zd Zd Zedd       Z fdZ xZS )_WaitKernelc                
   | j                   d   }t        |t              r|j                   d   gS t        |t              rC|j                   d   }t        |t              r"|j                  d   \  }}|j                   |   gS g S g S r  )r  r]   rW  r  rj  )r   r  collr   r   s        rr   get_volatile_readsz_WaitKernel.get_volatile_reads  s}    kk!nc,-JJqM?"[) ::a=D$ 12Q3C())I Irs   c                n   t         j                  j                  5  | j                  ||      \  }}}}}d d d        rJ | d|         | t	        |j                               |      }|j                  j                  t        t	        |j                               ||             y # 1 sw Y   xxY w)Nr  )	rP   r   rl  rz  r  r   r   re  r  )	r  r  r  rw  rr  rs  rf  r  r  s	            rr   create_waitz_WaitKernel.create_wait  s    WW 	0 ""63/!	0 %E2C1D&EE$s~~'(
 	&&:cnn&67fE	
!	0 	0s   B++B4c                    t         |          }| j                         }|D ]>  }|j                  j	                  t        j                  |j                                      @ |S r|   )rp  rA  rf  rK  r  r4   rE  r  )r   ri  volatile_readsvrrr  s       rr   rA  z_WaitKernel.get_read_writes  sZ    g-/002  	GB!!,"6"6r{{}"EF	Grs   )r  rX   ru   rv   )r   r%  r&  rf  r  rh  rA  r  r  s   @rr   rc  rc    s&    * 
 
* rs   rc  c                   t        | t        t        f      rt        |       S t        | t        t
        f      r!t               }| D ]  }|t        |      z  } |S t        | t        j                        rt        |       S t               S r|   )
r]   r+   r   r(   r_   r^   r-   r  rb   rm  )r   r  r   s      rr   r  r    sv    !h%&$Q''	At}	%&0l 	0A,Q//A	0	Au||	$$Q''|rs   )rx   rt   ru   rv   )r   r   ru   zCallable[..., OpsValue])r   Sequence[int]ru   z&Callable[[Sequence[_T]], Sequence[_T]])r   z&Callable[[Sequence[_U]], Sequence[_V]]r   z&Callable[[Sequence[_T]], Sequence[_U]]ru   z&Callable[[Sequence[_T]], Sequence[_V]])r   Sequence[Union[int, Integer]]ru   rn  )r   z(Sequence[Union[int, torch.SymInt, Expr]]ru   rm  r$  )r   zLiteral[None]r   r   ru   rv   )r   rY   r   r   ru   r  )r   r  r   r   ru   zOptional[torch.Tensor])r   zOptional[Sequence[_T]]ru   z Optional[Sequence[Optional[_T]]])r   r   ru   r  )r   r   ru   r   )r   zUnion[Expr, Sequence[Expr]]r   r  ru   rO   )r  r   r   r  r  r   ru   zCallable[..., object])r  Sequence[_IntLike]r  ro  r   ro  ru   r   )r   rY   ru   r   )TFNFN)r   rY   re  r   rp  r   rq  'Optional[Sequence[Union[int, Integer]]]rr  r   rs  rp  ru   zTuple[StorageBox, Layout])r   rY   rq  rn  ru   r   )r   ro  r  ro  ru   r   )r   r  ru   rl   )r  zSequence[IRNode]ru   r   )r   r   ru   zOrderedSet[Symbol](  
__future__r   r)  r  r  r  loggingtextwrapr   r   r   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   unittest.mockr   rh   r   r   r   torch._export.serde.schema_exportserder  r  torch._loggingrb   torch.fxtorch.utils._pytreer  _pytreer]  torch._dynamo.device_interfacer   torch._dynamo.utilsr   torch._export.serde.serializer   *torch._higher_order_ops.auto_functionalizer   torch._inductorr   torch._prims_commonr   r    r!   r"   r#   torch._subclasses.fake_tensorr$   %torch.fx.experimental.symbolic_shapesr%   r&   r'   r(   r)   r*   r+   torch.utils._ordered_setr-   torch.utils._sympy.functionsr.   r/   r0   torch.utils._sympy.symbolr1   r   r3   r4   codegen.commonr5   r6   r7   r8   r9   r:   	loop_bodyr;   ops_handlerr<   r=   runtime.benchmarkingr>   runtime.hintsr?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   virtualizedrN   rO   rP   r   rQ   rR   rS   rT   rl   rU   r'  	getLoggerr   r|  r   rB  r   rZ   ry   r   r   r   r   r  r  r   r   r   r   r   r   r   rY   r  r-  rY  r  r  r  r  r  r  r  r  r  r?  rH  r   rm  rf  r  r}  r  re   r  r  r  r  rz  r.  r9  rQ  r\  rc  rl  rp  r
  r  r   r  r  r  rv  r  r#  r  r+  r0  r`  r  r  r  r   PrimitiveInfoTyper  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r$  r)  r1  rA  rN  rf   rg   ri  _embedding_bagr  _fft_c2c'_scaled_dot_product_efficient_attention#_scaled_dot_product_flash_attention#_scaled_dot_product_cudnn_attention
_scaled_mmaddmmr   bmmcopy_mmrepeat_interleaverm  nonzeror  r   view_as_real
has_c_shimrl  r  r  r  r  rX   ru  r  r  r  r5  rm   r  rW  rc  r  rw   rs   rr   <module>r     sj   "        "     " (   ' ' 2 2   $ $ C ( ? M #  :   0 L L * " D    4 - (     * ) $T]T]T]CI&) &g!			8??4	8yy~~'T  k	sDk!12K8STU	i 	$>44 , ! $ (" 
 
 
 

 .2&*8!%)
'aE aEH A A AH x
F x
 x
v&B B2 
i 
 
8 |$y!y!u=)< 8  JN<N<N +<NBF<N<N~   ,> FX 	   R
 R
 R
j|
y |
~ v
5 v
 v
t 	 	 	 I5 I IX	 !<@=A/// / :	/
 / ;/ /d $59#4#44$  
:	 WLv WL WLt C C CL (( ( (V79( 79t (  : P; P Pf Ph P Pf % % %PH< H<V6 4 8| 8 8$ @| @ @'9	< @TV @T @TF!& !HI7V I7XPf P2 *M* M*` uV u up 
&fi 
& 
&& 

[ 
&-6 -XF X  4_ 4 4D
5
_ 5
p*> *Z #udCeCeT<Q6R1SST $$ $$N"| "
.=. .=bM M$  8? 8 8v 
L9 L^ `< ` `F 4l 4 4n
/ 
,&" &"RV .bl bJ)<| )<Z/, /d< 6B5 B"F- F,H)l H)V.)| .)bK K@-L -0-;< -;`   
 ##44<<00880088



%%		!!
(N*& N*b 
. 
 
<   .
, .
b ; ; ;|+
 +T% T%n *v * *L u*, u* u*p {) {) {)|)n )X f  G GT2# 2prs   