
    wg                       d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ ddlmZ d dlmZ ed ed	        Z ed
      Z  G d dejB                        Z"dEdZ# G d d      Z$d Z%i Z&dFdZ' G d dee          Z(d Z)d Z*i dddddddddddd d!d d"dd#d$d%d$d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9Z+ e,e+j[                               D ]  Z.e.e+e.<   	  G d: d;e(e          Z/edGd<       Z0eddddddd=	 	 	 	 	 	 	 	 	 	 	 dHd>       Z0	 dIddddddd=	 	 	 	 	 	 	 	 	 	 	 	 	 dJd?Z0 G d@ dA      Z1 G dB dC      Z2dD Z3y)K    )annotationsdivisionN)defaultdict)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTuple   )driver)
ModuleTypez.runtime.jitTc                  p     e Zd ZdZd fdZed        Zd Zd Zd Z	d Z
d Zd	 Zd
 Zd Zd Zd Z xZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    c                    t         |           || _        t        j                  |j                  d            | _        || _        h d| _        i | _	        d| _
        y )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstanceF)super__init__namehashlibsha256encodehasherglobalssupported_python_builtinsused_global_valsvisiting_arg_default_value)selfr%   r*   src	__class__s       W/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/triton/runtime/jit.pyr$   zDependenciesFinder.__init__$   sU    	nnSZZ%89 *
&. TV*/'    c                6    | j                   j                         S N)r)   	hexdigestr.   s    r1   retzDependenciesFinder.retH   s    {{$$&&r2   c                   t        |j                        t        j                  k(  r|j                  S |j                  | j
                  v ry | j                  j                  |j                  d       }|| j                  st        |      t        k7  rot        |t              s_t        |dd      sR|j                  | j                  vr:|| j                  f| j                  |j                  t	        | j                        f<   |S )N__triton_builtin__F)typectxastStoreidlocal_namesr*   getr-   r   r"   JITFunctionr!   r+   r,   )r.   nodevals      r1   
visit_NamezDependenciesFinder.visit_NameL   s    >SYY&77N77d&&&lltww-
 O 77 I+ #34WSJ^`e=fGG4#A#AABEt||ATD!!477Bt||,<"=>
r2   c                ^    |j                   D cg c]  }| j                  |       c}S c c}w r4   )eltsvisit)r.   rB   elts      r1   visit_TuplezDependenciesFinder.visit_Tuplej   s$     ,0995C

3555s   *c                2   | j                  |j                        }t        |t        j                        r6| j                  |j                        }t        |t        j                        r6|t        |dd      t        k(  ry t        ||j                        S )N__name__ )rG   valuer"   r<   	Attributer!   TRITON_MODULEattr)r.   rB   lhss      r1   visit_Attributez"DependenciesFinder.visit_Attributeo   sm    jj$cmm,**SYY'C cmm,;73
B7=HsDII&&r2   c                    fd} j                  j                        }|- ||      s%t        |t              sJ d|j                   d       t        j                  |ft         j                   j                         fdj                  D              D ]*  }t        |t              s ||      r|j                  } j                  j                         |j                  j                         z  D ]_  }|\  }} j                  |   \  }	}|j                  |   \  }
}|	|
k7  s2t        d| d|	 d j                   d|j                   d	|
 d
        j                  j                  |j                         t!        t#        |dd            }||z   } j$                  j                  |j'                  d             - y )Nc                    t        j                  j                        ryt        | dd      }|j	                  t
              S )NT
__module__rL   )inspect	isbuiltinfuncr!   
startswithrO   )rX   modulerB   s     r1   is_triton_builtinz8DependenciesFinder.visit_Call.<locals>.is_triton_builtiny   s7      +T<4F$$]33r2   z
Function "zv" is being called from a Triton function but is not a Triton function itself. Decorate it with @triton.jit to fix thisc              3  T   K   | ]  }j                  |j                         ! y wr4   )rG   rM   ).0kwr.   s     r1   	<genexpr>z0DependenciesFinder.visit_Call.<locals>.<genexpr>   s     :bTZZ!:s   %(Global variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr   )rG   rX   r"   rA   rK   	itertoolschainmapargskeywords	cache_keyr,   keysRuntimeErrorr%   updatestrr!   r)   r(   )r.   rB   r[   rX   objfunc_cache_keykvar_name_v1v2ra   keys   ``           r1   
visit_CallzDependenciesFinder.visit_Callw   s   	4 zz$))$|06*+;
 	^  ']  ^	^ 
 ??HDJJ		*:DMM:
 	4C
 c;/ % ]]N **//1C4H4H4M4M4OO !--a0A,,Q/A8&*8*KtCSTXT]T]S^^qrvrr  rA  AX  Y[  X\  \S  T  !!(()=)=>73
E:;H 8+CKKszz'239	4r2   c                    |j                   j                   D ch c]  }|j                   c}| _        | j                  |       y c c}w r4   )re   argr?   generic_visit)r.   rB   rv   s      r1   visit_FunctionDefz$DependenciesFinder.visit_FunctionDef   s4    /3yy~~>CGG>4  ?s   Ac                p     fd}t        j                  |j                  |j                  |j                  r|j                  gng |j
                        D ]  } j                  |         ||j                         |j                   j                  |j                          ||j                         y )Nc                    	 j                   rJ d_         | D ]  }|j                  |        	 d_         y # d_         w xY w)NTF)r-   rG   )defaultsexprr.   s     r1   visit_defaultsz:DependenciesFinder.visit_arguments.<locals>.visit_defaults   sS    8::::26/$ )D'

4() 38/%/s   < < 	A)
rb   rc   posonlyargsre   vararg
kwonlyargsrG   kw_defaultskwargr{   )r.   rB   r}   rv   s   `   r1   visit_argumentsz"DependenciesFinder.visit_arguments   s    	8 ??4#3#3TYYQUQ\Q\bdfjfufuv 	CJJsO	 	t''(::!JJtzz"t}}%r2   c                    | j                  |      }t        |t              r| xj                  t	        |      z  c_        y | j                  j                  |       y r4   )rG   r"   r   r?   setadd)r.   rB   targets      r1   visitAssnTargetz"DependenciesFinder.visitAssnTarget   sE     D!fd#F+  (r2   c                    t        |j                        dk7  rt        d      | j                  |j                  d          | j	                  |       y )N   z2Simultaneous multiple assignment is not supported.r   )r   targets	TypeErrorr   rw   r.   rB   s     r1   visit_AssignzDependenciesFinder.visit_Assign   sG    t||!
 PQQT\\!_- 	4 r2   c                \    | j                  |j                         | j                  |       y r4   r   r   rw   r   s     r1   visit_AnnAssignz"DependenciesFinder.visit_AnnAssign   $    T[[) 	4 r2   c                \    | j                  |j                         | j                  |       y r4   r   r   s     r1   	visit_ForzDependenciesFinder.visit_For   r   r2   )returnNone)rK   rU   __qualname____doc__r$   propertyr7   rD   rI   rR   rt   rx   r   r   r   r   r   __classcell__r0   s   @r1   r   r      sV    	"0H ' '<6
'+4Z!
&@)!!!r2   r   c                t    t        | t              r| j                  S t        | t              r| S t	        |       S r4   )r"   r:   rK   rk   repr)tys    r1   _normalize_tyr      s.    "d{{	B		8Or2   c                      e Zd ZdZddZed        Zed        Zed        Zed        Z	ed        Z
ed        Zed	        Zy
)KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.c                .    || _         || _        || _        y r4   )num_paramdo_not_specialize)r.   r   paramr   s       r1   r$   zKernelParam.__init__   s    !2r2   c                .    | j                   j                  S r4   )r   r%   r6   s    r1   r%   zKernelParam.name  s    {{r2   c                    | j                   j                  r1| j                   j                  t        j                  j                  k(  ryt        | j                   j                        S )NrL   )r   
annotationrV   	Parameteremptyr   r6   s    r1   r   zKernelParam.annotation  sD    {{%%)?)?7CTCTCZCZ)ZT[[3344r2   c                    | j                   }dD ]4  \  }}||j                  |      t        |      z   d  }|s)||v s.| | c S  |dk(  ryy)N))uintu)r   iboolu1rL   )r   findr   )r.   r   ty1ty2widths        r1   annotation_typezKernelParam.annotation_type  se    __
5 	'HCzs3c#h>?@E
*ug&	' r2   c                    d| j                   v S )N	constexpr)r   r6   s    r1   is_constexprzKernelParam.is_constexpr  s    doo--r2   c                <    d| j                   v xr | j                   S )Nconst)r   r   r6   s    r1   is_constzKernelParam.is_const  s    $//)C$2C2C.CCr2   c                .    | j                   j                  S r4   )r   defaultr6   s    r1   r   zKernelParam.default  s    {{"""r2   c                d    | j                   j                  t        j                  j                  k7  S r4   )r   r   rV   r   r   r6   s    r1   has_defaultzKernelParam.has_default"  s#    {{""g&7&7&=&===r2   N)r   r   r   zinspect.Parameterr   r   )rK   rU   r   r   r$   r   r%   r   r   r   r   r   r   r    r2   r1   r   r      s    L3
     5 5
   . . D D # # > >r2   r   c                    t        | d      r| j                         dz  dk(  ryt        | t              r| dz  dk(  ry| dk(  ryy)Ndata_ptr   r   Dr   1N)hasattrr   r"   r   )vs    r1   compute_spec_keyr   '  sF    q*1::<"#4#9	As	FaK!Vr2   c                ^   | yt        | t              ryt        | t              rd| k  r| dk  ryd| k  r| dk  ryy	t        | t              ry
| j                  |f}t
        j                  |d       }|:|d   rdndt        t        |d         j                  d      d      z   }|t
        |<   |S )Nnonei1   i32                u64i64fp32r   *k*r   .)
r"   r   r   r   dtype	dtype2strr@   type_canonicalisation_dictrk   split)rv   r   dskress       r1   mangle_typer   7  s    
{	C		C	s?si/c\cY.	C	 yy(#mmC&;q64s.HSQRVIZIZ[^I_`bIc.ddC IcN
r2   c                       e Zd ZU ded<   ddZy)KernelInterfacer   runc                      fdS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                 .     j                   | dd|S )NFgridwarmup)r   )re   kwargsr   r.   s     r1   <lambda>z-KernelInterface.__getitem__.<locals>.<lambda>Y  s    xtxx$T%'YRX'Y r2   r   )r.   r   s   ``r1   __getitem__zKernelInterface.__getitem__S  s     ZYr2   N)r   r   )rK   rU   r   __annotations__r   r   r2   r1   r   r   P  s    	
FZr2   r   c                   |j                         D ci c],  \  }}||j                  j                  dk(  rt        |      n|. }}}dd l}| |||j                         |j                  d}|j                  |      }	|	S c c}}w )Nr   r   )r%   	signature	constantsattrsoptionsrs   )itemsr0   rK   rk   jsonto_dict__dict__dumps)
r%   r   r   r   r   rs   rM   r   rl   serialized_objs
             r1   serialize_specialization_datar   ]  s    enetetevwWaWZ\aEOO$<$<$Gc%jURwIw99u}}C ZZ_N xs   1B c                   t        | j                        t        |      k(  sJ g }g }g }g }g }g }t        | j                  j                         |      D ]&  \  \  }}	}
|	j                  t
        j                  j                  u r)|j                  |       |j                  d| d|        n-|j                  | d|        |j                  d| d|        |
j                  r|j                  |       |j                  |       |
j                  s|j                  d|z         |
j                  r |j                  d|
j                  z         |j                  d|d|
j                  rdnd	d
       ) dj                  ||z   D cg c]  }|dz   	 c}      }dj                  |D cg c]  }|dz   	 c}      }dj                  |D cg c]  }|dz   	 c}      }|j                  d       dj                  |      }dj                  |      }d|d|d|d|d|d}| j                  j                         D ci c];  \  }}|j                  t
        j                  j                  urd| |j                  = }}}t        |d<   t        |d<   t!        ||       |d   S c c}w c c}w c c}w c c}}w )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    'z': z	=default_zcompute_spec_key(%s)z"%s"zmangle_type(, TrueFalse)rL   z**excess_kwargszdef dynamic_func(z):
    return {z}, (z), (z), excess_kwargsdefault_r   r   dynamic_func)r   
parameterszipr   r   rV   r   r   appendr   r   r   r   joinr   r   exec)sigkparams	func_argsdict_entriesconstexpr_valsnon_constexpr_valssignature_typesspecialisationsr%   spkpxrg   args_strdict_str	func_bodyr   func_namespaces                     r1   create_function_from_signaturer  h  s    s~~#g,... ILNOO 4 4 6@ k$R::**000T"!D6TF 34vYtf56!D6TF 34??!!$'%%d+''&&'='DE!!&&v0B0B'BC&&PRP[P[fahFh'ij!k$ ?_+LMaTMNIWW?1a$h?@N4F!Gq!d(!GH&' yy#Hyy&H(I~7IKI >>//1D%== 1 1 7 77 4&5==(N  %0N=!)9N%& 	N# .))5 N?!Gs   J/2J4J9	A J>r   r   
float8e4nvfp8e4nvfloat8e5fp8e5float8e4b15fp8e4b15float8_e4m3fn
float8e4b8fp8e4b8float8_e4m3fnuzfloat8_e5m2float8e5b16fp8e5b16float8_e5m2fnuzfloat16fp16bfloat16bf16float32r   float64fp64int8i8int16i16int32r   r   u8u16u32r   )int64uint8uint16uint32uint64c                       e Zd ZdZdZed        Zed        Zd Zedd       Z	d Z
d Zd	 Zd
 Zd Z	 	 ddZed        Zd Zd Zd Zd Z fdZd Z xZS )rA   Nr   c                    t        | d      r| j                  S t        | t              ryt        | t              rd| k  r| dk  ryd| k  r| dk  ryy	t        | t
              ry
| y t        dt        |        d|        )Nr   r   r   r   r   r   r   r   r   r   zUnsupported type z for )r   r   r"   r   r   r   r   r:   rv   s    r1   _key_ofzJITFunction._key_of  s    3 99T"S!33)#3##"2U#[/S	{%uEFFr2   c                    t        | d      r$| j                         t        j                  z  dk(  S t	        | t
              r| dz  dk(  | dk(  fS | d u fS )Nr   r   r   r   r   r   rA   divisibilityr"   r   r:  s    r1   _spec_ofzJITFunction._spec_of  sU    3
#<<>K$<$<<AAS!"HM3!8,,tr2   c                   ddl m} d }t        | j                  |      D ch c]%  \  }} ||      r|j                  s|j
                  ' }}}t        | j                  |      D ch c]B  \  }}t        |t              r-t        |t              s|dk(  r|j                  s|j
                  D }}} |t        |      t        |            S c c}}w c c}}w )Nr   )AttrsDescriptorc                    t        | d      r$| j                         t        j                  z  dk(  S t	        | t
              r| t        j                  z  dk(  S | yy)Nr   r   TFr=  )r  s    r1   is_divisible_by_16z3JITFunction._get_config.<locals>.is_divisible_by_16  sS    q*%zz|k&>&>>!CCAs#;333q88yr2   r   )
compilerrA  r  paramsr   r   r"   r   r   tuple)r.   re   rA  rC  r   rv   divisible_by_16
equal_to_1s           r1   _get_configzJITFunction._get_config  s    .	 "$++t4
s!#&u/F/F II
 
 "$++t4
s#s#JsD,AcQhW\WnWn II

 
 u_5uZ7HII


s   *C)ACc                    | yt        | t              r| S t        |       j                  d      d   }t        |   }|rdnd}||z   S )N*i8r   r   r   r   )r"   rk   r   r   )rs   r   	dtype_str	const_strs       r1   _type_ofzJITFunction._type_of  sP     ;S!JHNN3'+	.y9	$D#	9$$r2   c                D    t        t        | j                  |            }|S r4   )dictr  
constexprs)r.   constexpr_keyr   s      r1   _make_constantszJITFunction._make_constants  s    T__m<=	r2   c                   t         j                  y| j                  j                  }| j                  j                  }dj                  t        | j                  |d         D 	
cg c]  \  }	}
|	j                   d|
  c}
}	      }| d|j                   d|j                   d|j                   d|j                   d	| d
} G d d      }t        ||||d   ||      }||||j                  |j                  |j                  |j                  |j                  ||d
}t         j                  || ||||       d|i|dd      S c c}
}	w )NFr   r   z: z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=](r   c                      e Zd Zd Zy)/JITFunction._call_hook.<locals>.JitFunctionInfoc                .    || _         || _        || _        y r4   )rZ   r%   jit_function)r.   rZ   r%   rY  s       r1   r$   z8JITFunction._call_hook.<locals>.JitFunctionInfo.__init__'  s    $ 	$0!r2   N)rK   rU   r   r$   r   r2   r1   JitFunctionInforW  %  s    r2   rZ  r   )
r   devicer   	num_warpsnum_ctas
num_stagesenable_fp_fusionextern_libsconfigsspecialization_datars   )rs   r   fncompileis_manual_warmupalready_compiled)rA   
cache_hookrc  rK   rU   r  r  rE  r%   r\  r]  r^  r_  r   r`  )r.   rs   r   r[  r   r   ra  r%   rZ   r   r   	arg_reprsr   rZ  rb  r   s                   r1   
_call_hookzJITFunction._call_hook  s    !!)ww##IIc$++WZ[\W]F^_%**Rt4_`	{7#4#4"5[AQAQ@RR_`g`r`r_s  tG  HO  H`  H`  Ga  ac  dm  cn  no  p	 	 <D)YX_`aXbdkmpq #" **((!,, ' 8 8"..#6
 %%vtT2C*6*"" & 
 	
5 `s   %E
c                T    t        |      sJ | j                  j                  |       y)z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)callablepre_run_hooksr  )r.   hooks     r1   add_pre_run_hookzJITFunction.add_pre_run_hookE  s$    
 ~~!!$'r2   c                8   ddl m}m}m}m} || _        || _        || _        || _        t        | j                  | j                        | _        t        | j                        D cg c]  \  }}|j                  s| c}}| _        t        | j                        D cg c]  \  }}|j                  r| c}}| _        t        | j                        D cg c]!  \  }}|j                  r|j                  r |# c}}| _        yc c}}w c c}}w c c}}w )z1
        Precompute as much as possible.
        r   )CompiledKernelrd  	ASTSourcemake_backendN)rD  rp  rd  rq  rr  r  r   rE  binder	enumerater   constexpr_indicesnon_constexpr_indicesr   specialised_indices)r.   rp  rd  rq  rr  r   ps          r1   create_binderzJITFunction.create_binderM  s     	PO,"(4T^^T[[Q2;DKK2H![AANN!![6?6L%cFQTUTbTba%c"%dkk2$
1a1;N;NYZYgYgA$
  "\%c$
s*   &D
;D
 D5DD/D<Dc                  t         j                  j                         }t         j                  j                  |      }| j                  |d<   | j
                  D ]
  } ||i |  | j                  | j                           | j                  |i |\  }}	}
}}dj                  |	      t        |
|f      z   }| j                  |   j                  |d       }|t         j                  j                         }| j                  |      }|j                  |      }d|vsJ d       d|vsJ d       d|vsJ d       |D ]  }||j                  vst!        d	|z         t#        |j%                               }| j&                  D cg c]  }| j(                  |   j*                   }}|	d t-        |       }t/        ||      D ci c]  \  }}||d
k(  rdn| }}} | j0                  | f}t/        || j(                        D ci c];  \  }}|j2                  s|j4                  |d   j6                  v s||j*                  |= }}}|j9                         D ]  \  }}t;        |      st=        d| d       | j?                  ||||||      ry | jA                  | |||d         }| jC                  |||j                        }|| j                  |   |<   tE               }| jF                  j9                         D ]6  \  \  }} \  }!}"|"j                  ||      x}#|!k7  s$tI        d| d|! d|#        |s|J t;        |      r ||      }t-        |      }$|d   }%|$dkD  r|d   nd}&|$dkD  r|d   nd}' |jJ                  ||g| }( |jL                  |%|&|'||jN                  |jP                  |(| jR                  jT                  | jR                  jV                  g	|  |S c c}w c c}}w c c}}w )NdebugrL   device_typez=device_type option is deprecated; current target will be usedr[  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedr   rK  r   zCallable constexpr at index z is not supported)r   r   r`   z1 has changed since we compiled this kernel, from z to r   r   ),r   activeget_current_deviceget_current_streamr{  rl  rs  ry  r  rk   cacher@   get_current_targetrr  parse_optionsr   KeyErrorrF  valuesrv  rE  r%   r   r  rI  r   r   rH  r   rk  r   ri  rq  rd  objectr,   ri   launch_metadatar   functionpacked_metadatarp  launch_enter_hooklaunch_exit_hook))r.   r   r   re   r   r[  r}  rm  
bound_argssig_and_specr
  r  excess_kwargsrs   kernelr   backendr   rn   
bound_valsr   sigkeyssigvalsr   r   ra  rx  r   rv   r/   not_presentr%   globals_dict_idrC   globals_dictnewVal	grid_sizegrid_0grid_1grid_2r  s)                                            r1   r   zJITFunction.run]  sT   11311&9**w && 	"D$!&!	" ;; VaVZVaVacgVrkqVrS
L.2Dm ggl#c>=*I&JJF#''T2>]]557F''/G++F3G !.o0oo.6)e+ee)6)e+ee)" ]G,,,"#WZ[#[\\] z0023J 594N4NOqt{{1~**OGO"=CL1GJMgW^J_`AqF{U:`I`'t''47G "*dkk:Q>>QUUgaj.C.C%Cqy 	I 
 $//+ Y3C=#&B1#EV$WXXY sIvy'7S..y)WQZHC\\(( " F
 '-DJJvs# h<@<Q<Q<W<W<Y 	q8#T?%8c<&**4==#E"&tf,]^a]bbfgmfnoq q	q
 ###~ J'D	I!WF )AT!W1F )AT!W1F 5f44T6WDVWOFJJvvvvvH^H^`o**<<d>Q>Q>b>byewyc P`s    O#O(A O.c                   |r|ng }| _         j                  | _        || _        t	        j
                        | _        || _        t	        j                        d   | _        fd| _	        || _
        d | _        g | _        t        | j
                  j                  j                               D ]C  \  }}	|xr ||v xs |	j                   |v }
| j                  j#                  t%        ||	|
             E t'        j(                  t	        j*                              | _        | j,                  t/        j0                  d| j,                  t.        j2                        j5                         d  | _        t7        t8              | _        d | _        i | _        d | _         tB        jD                  jG                  dd      dk(  rdn|| _$        || _%        | j                  D cg c]  }|j                    c}| _&        | j                  D cg c]  }|jN                  s|jP                   c}| _)        g | _*        jV                  | _+        jX                  | _,        jZ                  | _-        j                  | _        y c c}w c c}w )Nr   c                0    j                   S  |       S r4   )rK   )rp   rc  r   s    r1   r   z&JITFunction.__init__.<locals>.<lambda>  s    T\bkk tAw r2   z^def\s+\w+\s*\(TRITON_DEBUG0r   T).rc  rU   rZ   versionrV   r   r   getsourcelinesstarting_line_numberr   r  rs  rE  rt  r  r  r%   r  r   textwrapdedent	getsourcer/   research	MULTILINEstartr   rP  r  hashr,   r  osenvironr@   r{  ra   	arg_namesr   r   rQ  rl  r   rK   __globals__)r.   rc  r  r   r{  ra   r   r  r   r   dnsrx  s    `    `     r1   r$   zJITFunction.__init__  s   1B-mm **2.!2$+$:$:2$>q$A!F	.!$..";";"B"B"DE 	;HAu#c.?)?)b5::QbCbCKK{1eS9:	;
 ??7#4#4R#8988BII&8$((BLLQWWYZ[ &
	 TV ZZ^^NC@CGTU
  +/++6Q!&&6*.++HQ155H   zz>>-- 7Hs   I<JJc                x   | j                   t        | j                  | j                  | j                        }|j                  | j                                |j                  t        | j                        z   | _         t        t        |j                  j                                     | _        | j                   S )N)r%   r*   r/   )r  r   rK   r  r/   rG   parser7   rk   r  rP  sortedr,   r   )r.   dependencies_finders     r1   rg   zJITFunction.cache_key  s     99"4$--QUQaQagkgogo"p%%djjl3+//#d6O6O2PPDI$(0C0T0T0Z0Z0\)]$^D!yyr2   c               \     | j                   t        t        j                  |      |dd|S )NTr   )r   rd   
MockTensor
wrap_dtype)r.   r   re   r   s       r1   r   zJITFunction.warmup  s*    txxZ5J5JD1QT$\U[\\r2   c           	        ddl m}m}m} dd l}dd lm} t        j                  j                         }|j                  |      }|d   | j                  j                  k7  r(t        d|d    d| j                  j                         |d   j                         D 	
ci c]4  \  }	}
|	|j                  j!                  |
      r|j                  |
      n|
6 }}	}
t#        |d   j                               } || |||j%                  |d	               }|d
   j                         D 	
ci c]#  \  }	}
|	t'        |
t(              rt+        |
      n|
% }}	}
|d   }	 ||d |      }|| j,                  |   |	<   |S c c}
}	w c c}
}	w )Nr   )rA  rd  rq  r   r%   zSpecialization data is for z but trying to preload for r   r   r   r   rs   )rD  rA  rd  rq  r   triton.languagelanguager   r~  r  loadsrc  rK   ri   r   r   is_dtyperP  	from_dictr"   r   rF  r  )r.   rb  rA  rd  rq  r   tlr[  deserialized_objrs   rM   r   r   r/   r   r  s                   r1   preloadzJITFunction.preload  s   BB$113::&9:F#tww'7'77-.>v.F-GGbcgcjcjcscsbtuw w /{;AAC
U BHH$5$5e$<%%G
	 
 )+6<<>?	iO4M4MN^_fNg4hi /y9??A
U E4!8ueC
 
 u%dG,"(

63

s   9E7)(E=c                   t        j                  | j                        }t        |t         j                        sJ t        |j                        dk(  sJ t        |j                  d   t         j                        sJ |S )Nr   r   )r<   r  r/   r"   Moduler   bodyFunctionDef)r.   trees     r1   r  zJITFunction.parse  s_    yy"$

+++499~"""$))A,888r2   c                    t        d      )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)ri   )r.   re   r   s      r1   __call__zJITFunction.__call__   s    WXXr2   c                H    t         t        |   ||       |dk(  rd | _        y y )Nr/   )r#   rA   __setattr__r  )r.   r%   rM   r0   s      r1   r  zJITFunction.__setattr__#  s)    k4,T59 5=DI r2   c                P    d| j                    d| j                  j                   dS )NzJITFunction(:r   )rZ   rc  rK   r6   s    r1   __repr__zJITFunction.__repr__*  s&    dkk]!DGG,<,<+=Q??r2   F)NNNNNN)rK   rU   r   rg  r>  staticmethodr;  r?  rI  rN  rS  ri  rn  ry  r   r$   r   rg   r   r  r  r  r  r  r   r   s   @r1   rA   rA     s    JLG G&  J8 
% 
%/
b(
 Xt bf!%8(t  ]6Y@r2   rA   c                     y r4   r   )rc  s    r1   jitr  3  s    r2   r  r   r  r   r{  ra   c                     y r4   r   r  s         r1   r  r  8  s     r2   c               <    dfd}|  ||       S |S )a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    c           	         t        |       sJ t        j                  dd      dk(  rddlm}  ||       S t        |       S )NTRITON_INTERPRETr  r   r   )InterpretedFunction)r  r   r{  ra   r   r  )rk  r  getenvinterpreterr  rA   )rc  r  r{  r   r  ra   r   r  s     r1   	decoratorzjit.<locals>.decoratora  sS    ||99'-48&r**"3! / r2   rc  r   r   zJITFunction[T]r   )rc  r  r   r  r   r{  ra   r  s    `````` r1   r  r  E  s&    8   
~} r2   c                  6    e Zd ZdZed        Zd Zed        Zy)r  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                l    | j                   j                  dk(  r| j                  dk(  rt        |       S | S )Nr   torch)r0   rK   rU   r  r:  s    r1   r  zMockTensor.wrap_dtype  s.    ==!!W,71Jc?"
r2   c                    || _         y r4   )r   )r.   r   s     r1   r$   zMockTensor.__init__  s	    
r2   c                      y)Nr   r   r   r2   r1   r   zMockTensor.data_ptr  s    r2   N)rK   rU   r   r   r  r  r$   r   r   r2   r1   r  r  }  s4    
  
  r2   r  c                  >    e Zd Zd Zd Zd Zd
dZd Zd Zd Z	d Z
y	)TensorWrapperc                    || _         || _        |j                  | _        |j                  | _        | j                  j                  | _        y r4   )r   basedatar[  shape)r.   r  r   s      r1   r$   zTensorWrapper.__init__  s5    
	II	kkYY__
r2   c                6    | j                   j                         S r4   )r  r   r6   s    r1   r   zTensorWrapper.data_ptr  s    yy!!##r2   c                8    | j                   j                  |      S r4   )r  stride)r.   r   s     r1   r  zTensorWrapper.stride  s    yy""r2   c                <    d| j                    d| j                   dS )NzTensorWrapper[rU  r   )r   r  r6   s    r1   __str__zTensorWrapper.__str__  s    

|2dii[::r2   c                6    | j                   j                         S r4   )r  element_sizer6   s    r1   r  zTensorWrapper.element_size  s    yy%%''r2   c                ^    t        | j                  j                         | j                        S r4   )r  r  cpur   r6   s    r1   r  zTensorWrapper.cpu  s    TYY]]_djj99r2   c                N    | j                   j                  |j                          y r4   )r  copy_)r.   others     r1   r  zTensorWrapper.copy_  s    		

#r2   c                `    t        | j                  j                  |      | j                        S r4   )r  r  tor   )r.   r[  s     r1   r  zTensorWrapper.to  s     TYY\\&14::>>r2   Nr   rk   )rK   rU   r   r$   r   r  r  r  r  r  r  r   r2   r1   r  r    s*    %$#;(:$?r2   r  c                    t        | t              r;|| j                  j                  k(  r| j                  S t        | j                  |      S t	        | d      rt        | |      S t        dt        |        d      )Nr   zCannot reinterpret a r   )r"   r  r  r   r   r   r:   )tensorr   s     r1   reinterpretr    sk    &-(FKK%%%;; !e44		$VU++/V~Q?@@r2   r  r  r  )r   Optional[Callable]r  r  r   Optional[Iterable[int]]r{  Optional[bool]ra   r  r   zCallable[[T], JITFunction[T]]r4   )rc  zOptional[T]r   r  r  r  r   r  r{  r  ra   r  r   z4Union[JITFunction[T], Callable[[T], JITFunction[T]]])4
__future__r   r   r<   r&   rV   rb   r  r  r  collectionsr   	functoolsr   typingr   r   r	   r
   r   r   r   r   r   r   runtime.driverr   typesr   rK   r   rO   r   NodeVisitorr   r   r   r   r   r   r   r   r  r   r   r  r   rA   r  r  r  r  r   r2   r1   <module>r     s   , 
    	 	  # % d d d # .3~../CLQ! Q!r+> +>\
 	2	Zgaj 	Z>*B
D)  :	
 Y ) y 7 : z v  v v D  U!" U#$ - 2 
(//1	2 &A$%q!&e@/!$ e@Z 
 
 
 #*.15 #	 	 (		
 /	 	 	 #	 
	 0 #*.15 #00 	0
 (0 /0 0 0 :0p (? ?>Ar2   