
    wgD                     $   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlZd dlmZ d dlmZ dZedk(  r  ee	      Zej'                  d
d       ej'                  ddeddd       ej'                  ddedd       ej'                  ddedd       ej'                  ddedd       ej'                  dd edd!       ej'                  d"d#ed$d%       ej'                  d&d'ed(d%       ej-                         Zej0                  rej0                  nej2                  Zej4                  rej4                  n ee      Z eej6                        Zej6                  j;                  d  eej<                               ej>                  jA                  ejB                  e      Z"ej>                  jG                  e"      Z$e"jJ                  jM                  e$        e'e$ej2                        Z(ejR                  jU                  d)      Z) e+e)      dk(  sJ  e, e-d* ej\                  jU                  d)                  Z.d+e
e   fd,Z/d-ej`                   d.ejb                   Z2 e/e.e2gz         Z3d/ Z4 e5e.      D  ci c]%  \  } }d0|v s|  e4|jU                  d0      d         ' c}} Z6e6jo                         D ci c]  \  }}|	|| c}}Z6 e5e.      D  ci c]  \  } }|  e4|       c}} Z8e8jo                         D ci c]  \  }}|	|| c}}Z8 e5e.      D  ci c]  \  } }| e8vs| |jU                  d0      d    ! c}} Z.d1js                  e8ju                         D cg c]
  } e|       c}      Z;e8jy                         D  cg c]  } e(jz                  |     d2e8|      c} Z>e>d3ej`                   d4ejb                   gz  Z>e6ju                         D ]  Z?e?d5v rJ d6e?         e6jo                         D  cg c]  \  } }|d7k(  s|  c}} Z@e6jo                         D  cg c]  \  } }|dk(  s|  c}} ZAej                  j                  e@eA8      ZDeAD ]  ZEe8j                  eEdi        ej                  j                  e(e8e.eD9      ZHej`                  ejb                  d:ZI ej                  eHeI;      ZKg Z=g ZLe.jy                         D ]#  ZEeEeAvse=e(jz                  eE   gz  Z=eLe.eE   gz  ZL%  ee.ju                         eD      ZMd<js                  ee3eMg      ZN e e j                  eKj                  d=               d>d? ZQi d@eNdAej2                  dB e+eQ      dCdDjs                   eReQddd>   eQddd>         D cg c]  \  }}dE| |  c}}      d+dDjs                   eRe=eL      D cg c]  \  }} e|       dF|  c}}      dGdDjs                  e.jy                         D  cg c]  }  ee.|           dFe(jz                  |     ! c}       dHdDjs                  e=D 	cg c]  }	dI|	 	 c}	      dJ e+e=      dKe>dLeKj                  j                  dMej`                  dNd<js                  e;e2g      dOe)d    dPe)d   dQe)d>   dRdZUdSD ]  ZV eeW      j<                  dTeV z  ZXej                  dUe3 d<eM dUeV       j                  dV      5 Z[e[j                    eeX      j                         j                  dWi eU       ddd        yyc c}} w c c}}w c c}} w c c}}w c c}} w c c}w c c} w c c}} w c c}} w c c}}w c c}}w c c} w c c}	w # 1 sw Y   xY w)X    N)ArgumentParser)Path)List)kernel_suffix)	ty_to_cppa  
Triton ahead-of-time compiler:

This program compiles the kernel with name `kernel-name` in the file at the
provided `path` into self-contained C source-code that embeds the `cubin`
data along with utilities to load, unload and launch the kernel.

signature is provided as a list of (optionally divisibility-hinted) types
or constexpr values, e.g.

`compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py`

will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`.
Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16,
and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype.

The resulting entry point will have signature

CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2)

Different such specialized entry points can be combined using the `linker.py` script.

NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter
used to run this `compile.py` script
__main__)descriptionpathzTPath to Python source containing desired kernel in its scope. File will be executed.)helpz--kernel-namez-n zName of the kernel to compileT)typedefaultr   requiredz--num-warpsz-w   z$Number of warps to launch the kernel)r   r   r   z--num-stagesz-ns   z/Number of stages (meta-parameter of the kernel)z
--out-namez-onz Out name for the compiled kernelz
--out-pathz-ozOut filenamez--signaturez-szSignature of the kernel)r   r   r   z--gridz-gzLaunch grid of the kernel,c                 $    | j                  d      S )N )strip)ss    Y/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/triton/tools/compile.py<lambda>r   F   s    1773<     	signaturec                     t        j                         }|j                  dj                  |       j	                                |j                         d d S )Nr      )hashlibsha256updatejoinencode	hexdigest)r   ms     r   hash_signaturer$   H   s?    NN	)$++-.{{}Ra  r   warpsxstagesc                 v    	 t        |       }|S # t        $ r Y nw xY w	 t        |       }|S # t        $ r Y y w xY w)N)int
ValueErrorfloat)r   rets     r   	constexprr,   P   sO    	a&CJ 			(CJ 		s    	, 	88:x=z
num_warps=znum_stages=)r      z#Only 1 and 16 are valid hints, got r0   )divisible_by_16
equal_to_1)fn	constantsr   attrs)	num_warps
num_stages)options_cubin   kernel_nametriton_kernel_namebin_sizebin_dataz, 0xr   full_signaturearg_pointers&num_argskernel_docstringsharedr6   	algo_infogridXgridYgridZ_placeholder)hczcompile..w )_binasciir   importlib.util	importlibsysargparser   pathlibr   typingr   tritontriton.compiler.code_generatorr   triton.backends.nvidia.driverr   desc__name__parseradd_argumentstrr(   
parse_argsargsout_namer=   out_pathr
   arg_pathinsertparentutilspec_from_file_locationstemspecmodule_from_specmodloaderexec_modulegetattrkernelgridsplitlenlistmapr   r$   r6   r7   meta_sigsig_hashr,   	enumeratehintsitemsr4   r    values	const_sigkeys	arg_names
doc_stringrM   r1   r2   compilerAttrsDescriptorr5   ir   	ASTSourcesrcoptscompileccinfo	arg_typessuffix	func_namehexlifyasmhex_zipmetadatarG   paramsext__file__template_pathwith_suffixopenfpwrite	read_textformat)
r   r   kvrM   r.   ynametyargs
   0000000000r   <module>r      s      
 #    8 34 z -F
s  u
CJi!%  '
t#qGmn
CN  P
e#tJlm
dt.Y
t#<U`de
$S7R]abD $t}}43C3CH $t}}4>H DIIHHHOOAs8??+,>>11(--JD
..
)
)$
/CKKC S$**+F99??3Dt9>> S/1E1Ec1JKLI!$s) !
 t~~&gdoo->?Hi8*45H 8A7KXtq!sVWxQ	!''#,q/**XE#kkm=daq}QT=E-6y-ABTQIaLBI"+//"3E$!Qq}AEI09)0D[1QZHZAGGCLO#[I)*:*:*<=Q#a&=>ICL>>CSTaV%%a()9Q<.9TJZ/0K?P2QRRJ \\^ GG|FB1#FF|G%*[[]>TQa2gq>O %81a!8JOO++OXb+cE !!Q !
//
#
#vi_d
#
eCtGDV^^C.FII^^ (J&**1-..I)A,'I( 9++-u5F(Hf56Ix

7 345a;Dyd.. 	CI 	DIIs4!9d14a4j7QRtq!A3qc{RS	
 	TYY#iYbJcdhdB9R=/4& 9de 	$))ajaoaoaq$r\]	)A,(?'@&BRBRSTBUAV%W$rs 				"BQse9"BC 	C	N 	J 	&//(( 	T^^ 	SXXy(34 	a 	a 	a  	!F$  GX--(3%0@@!!AhZq#"?@EEcJ 	GbHH;T-(224;;EfEF	G 	GGO l Y=BE[=T ?8, Sd$r"B	G 	Gs~   \<\<
]]#]
]],]9]1]]]$]$:]*]*]0]6$]<^:6^^	