
    wg                     t   d dl Z d dlmZ d dlZd dlZd dlZd dlmZ	 d dl
mZ ddlmZ d dlmZ ddlmZ dd	lmZ  G d
 d      Z G d d      Z ed       G d d             Zd Zd Zd Zd Zd Z ej:                  eej<                  g      Z ej:                  eej@                  g      Z! ej:                  eejD                  g      Z# G d d      Z$ G d d      Z%d Z&d Z'd Z( G d d       Z) G d! d"e)      Z* G d# d$e)      Z+d% Z,d& Z-d' Z.d( Z/ e%       Z0g d)Z1 G d* d+      Z2 G d, d-      Z3y).    N)Tuple)	dataclass   )InterpreterError)partial   )interpreter)irc                   *    e Zd Zd Zd Zd Zd Zd Zy)TensorHandlec                 .    || _         || _        i | _        y)a  
            data: numpy array
            dtype: triton type, either pointer_type or scalar_type.
            we don't store block_type here because the shape information is already availale in the data field
            attr: a dictionary of attributes
        N)datadtypeattr)selfr   r   s      _/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/triton/runtime/interpreter.py__init__zTensorHandle.__init__   s     	
	    c                 H    t        | j                  j                               S N)boolr   allr   s    r   __bool__zTensorHandle.__bool__   s    DIIMMO$$r   c                 h    | j                   }t        |d      r|j                  }t        |d      r|S )N
element_ty)r   hasattrr   )r   r   s     r   get_element_tyzTensorHandle.get_element_ty    s1    

e\*$$E e\*r   c                 ^    t        | j                  j                         | j                        S r   )r   r   copyr   r   s    r   clonezTensorHandle.clone&   s    DIINN,djj99r   c                 "    || j                   |<   y r   )r   )r   keyvalues      r   set_attrzTensorHandle.set_attr)   s    		#r   N)__name__
__module____qualname__r   r   r   r!   r%    r   r   r   r      s    	%:r   r   c                       e Zd Zd Zd Zy)BlockPointerHandlec                 X    || _         || _        || _        || _        || _        || _        y r   )baseshapestridesoffsetstensor_shapeorder)r   r-   r.   r/   r0   r1   r2   s          r   r   zBlockPointerHandle.__init__/   s-    	
(
r   c                 D   | j                   j                         }|j                  dz  }| j                  }t	        j
                  | j                   j                  | j                        }t	        j                  | j                  t              }t        t        |            D ]  }dgt        |      z  }||   ||<   | j                  |   j                  t	        j                  ||         z   j                  |      }	|||	z  | j                  |   j                  z  j                  t        j                         z   }||v st	        j"                  ||	| j$                  |   j                  k        } t'        || j                   j(                  j*                        }||fS )N   r   r   )r-   r   primitive_bitwidthr1   npbroadcast_tor   onesr   rangelenr0   arangereshaper/   astypeuint64logical_andr.   r   r   scalar)
r   boundary_checkdtype_ttn_bytesr1   ptrsmasksdim
bcast_dimsoffs
             r   materialize_pointersz'BlockPointerHandle.materialize_pointers7   sR   99++---2((tyy~~t/@/@A))6\*+ 	JCs<00J*3/JsO<<$))BIIl36G,HHQQR\]C7S=4<<+<+A+AAII"))TTDn$ucDJJsO4H4H.HI	J D$))//"8"89U{r   N)r&   r'   r(   r   rJ   r)   r   r   r+   r+   -   s    r   r+   T)frozenc                       e Zd ZU dZeed<   dZeed<   dZe	ed<   dZ
eed<   dZeed<   d	Ze	ed
<   dZee	   ed<   dZeed<   y)InterpreterOptionsNextern_libsFdebugarchTallow_fp8e4nvallow_fp8e4b15tf32default_dot_input_precision)rS   tf32x3ieeeallowed_dot_input_precisionsr   max_num_imprecise_acc_default)r&   r'   r(   rN   dict__annotations__rO   r   rP   strrQ   rR   rT   rW   r   rX   intr)   r   r   rM   rM   H   sY    KE4D#M4ND'--/I %*I)*!3*r   rM   c                    | t         j                  k(  rt         j                  S | t         j                  k(  rt         j                  S | t         j
                  k(  rt         j                  S | t         j                  k(  rt         j                  S | S r   )	r7   uint8int8uint16int16uint32int32r?   int64r5   s    r   _get_signed_np_dtypere   T   s[    ww		xx		xx		xxLr   c                 `   t        | t        j                        r#t        j                  t        j
                        S i t        j                  t        j                  t              t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                   t        j                  t        j                         t        j"                  t        j                  t        j"                        t        j
                  t        j                  t        j
                        t        j$                  t        j                  t        j                        t        j&                  t        j                  t        j                        t        j(                  t        j                  t        j                        t        j*                  t        j                  t        j                        t        j,                  t        j                  t        j                        t        j.                  t        j                  t        j                        i}t        | t        j0                        rVt        | j2                  t        j                        r#t        j                  t        j
                        S || j2                     S ||    S r   )
isinstancetlpointer_typer7   r   r?   int1r   float16float32float64r_   r^   ra   r`   rc   rb   rd   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer   )tt_dtypenp_typess     r   _get_np_dtyperw   `   s~   (BOO,xx		""
$


BHHRZZ( 	

BHHRZZ( 	

BHHRZZ(	
 	"''" 	"((288$ 	"((288$ 			288BII& 	"((288$ 			288BII& 	"((288$ 			288BII& 	RXXbii(  	RXXbhh'!" 	*#$ 	rxx)%& 	rxx)'( 	*)H, (BMM*h))2??;88BII&&++,,Hr   c                    t        t        d|j                         }t        t        d|j                         }t        j                  | j	                         |      }||j                  dz
  z	  dz  }|j                  |j
                  z
  dz
  }|j                  |j
                  z
  dz
  }	|d|j
                  z  dz
  z  }
|j                  }|j                  }||j
                  z	  d|z  dz
  z  j                  t        j                        }|dk(  }t        j                  |      rt        j                  |t        j                        }t        |j
                        D ]  }|
|z	  dz  }|j
                  |z
  ||dk(  <   ! |
dk(  }d||   z
  ||<   ||z
  |||z  <   |
|   ||   z  d|j
                  z  dz
  z  |
|<   t        j                  dt        j                  ||z
  |z   d|	z  dz
              }|j                  |      }|j                  |      }|j                  |j                  kD  r|
|j
                  |j
                  z
  z	  d|j
                  z  dz
  z  }|t        j                  j                   k(  r*|
d|j
                  |j
                  z
  dz
  z  z  }||dkD  z   }|j                  |      }n>|
j                  |      |j
                  |j
                  z
  z  d|j
                  z  dz
  z  }|dk(  }t        j                  |      r||j
                  z	  d|z  dz
  z  j                  t        j                        }|dk7  }||z  }t        j                  |t        j                        }d|z
  ||   |z
  z
  ||<   ||   ||   z	  d|j
                  ||   z
  z  z  ||<   ||j                  dz
  z  ||j
                  z  z  |z  }|j#                  | j$                        S )Nuintr5   r   r   )getattrr7   r6   
frombuffertobytesfp_mantissa_widthexponent_biasr>   rc   any
zeros_liker:   maximumminimum_irROUNDING_MODERTNEr=   r.   )inputinput_dtypeoutput_dtyperounding_modeinput_uint_dtypeoutput_unint_dtype	input_binsigninput_exponent_widthoutput_exponent_widthsignificand
bias_inputbias_outputexponentsubnormal_indexbit_posi	bit_indexzero_significand_indexexponent_outputsign_outputsignificand_outputcut_offnon_zero_exponent_indexshiftoutputs                             r   _convert_floatr      s+   rT+*H*H)I#JK tL,K,K+L%MNemmo5EFI+881<=ED&99K<Y<YY\]](;;l>\>\\_``[%B%B BaGHK**J,,Kk;;;FZAZ^_@_`hhikiqiqrH!mO	vvo
 --	:{445 	HA%*d2I&1&C&Ca&GGIN#	H "-!1$%(@$@!=G+=U'/9:(3O(DP_H`(`+///14(6O$ jjBJJ:0E0SWX\qWquvVv$wxO%,,-?@O++01K%%(G(GG)k.K.KlNlNl.lm,000A57C--222!Q;+H+H<KiKi+ilm+m%noG!3w{!C/667IJ)001CD+==@]@]]_#$(F(F#F!"KM &*O	vvo
 +"?"??QJ^E^bcDcdllmomumuv"*a-),CCirxx8"#k/h6OR\6\!]o/A//RV[\kVl/l,0053IIJ/L?+l==AB<999;=OPF>>%++&&r   c                 ,    t        j                  |       S r   )matherfxs    r   _erfr      s    88A;r   c                 6    t        |       t        |      z  dz	  S )N@   )r\   )abs     r   
_umulhi_64r      s     FSVO""r   )otypesc                       e Zd Zed        Zy)ExtraFunctionsc                 d    t        j                  |j                  | j                  ||      |      S r   )rh   tensorcreate_fp_to_fphandle)r   dst_tyfp_downcast_rounding_builders       r   _convert_custom_typesz$ExtraFunctions._convert_custom_types   s(    yy11%,,H\]_effr   N)r&   r'   r(   staticmethodr   r)   r   r   r   r      s    g gr   r   c                   &   e Zd Zej                  j
                  ej                  j
                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  iZ
ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                   ej                  j                   ej                  j"                  ej                  j"                  ej                  j$                  ej                  j$                  ej                  j&                  ej                  j&                  ej                  j(                  ej                  j(                  ej                  j*                  ej                  j*                  ej                  j,                  ej                  j,                  i
ZddZd Zd Zd Zd Zd Zd Zd	 Zd
 Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d Z0d Z1d Z2d Z3d Z4d Z5d  Z6d! Z7d" Z8d# Z9d$ Z:d% Z;d& Z<d' Z=d( Z>d) Z?d* Z@d+ ZAd, ZBd- ZCd. ZDd/ ZEd0 ZFd1 ZGd2 ZHd3 ZId4 ZJd5 ZKd6 ZLd7 ZMd8 ZNd9 ZOd: ZPd; ZQd< ZRd= ZSd> ZTd? ZUd@ ZVdA ZWdB ZXdC ZYdD ZZdE Z[dF Z\dG Z]dH Z^dI Z_dJ Z`dK ZadL ZbdM ZcdN ZddO ZedP ZfdQ ZgdR ZhdS ZidT ZjdU ZkdV ZldW ZmdX ZndY ZodZ Zpd[ Zqd\ Zrd] Zsd^ Ztd_ Zud` Zvda Zwdb Zxdc Zydd Zzde Z{df Z|dg Z}dh Z~di Zdj Zdk Zdl Zdm Zdn Zdo Zdp Zdq Zdr Zds Zdt Zdu Zdv Zdw Zdx Zdy Zdz Zd{ Zd| Zd} Zd~ Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zy)InterpreterBuilderNc                 x    d | _         t               | _        i | _        t        j
                  | j                  d<   y )Nconvert_custom_types)rP   rM   optionscodegen_fnsr   r   r   s    r   r   zInterpreterBuilder.__init__   s2    	)+3A3W3W/0r   c                     || j                   d   k  st        d      || j                   d   k  st        d      || j                   d   k  st        d      |||f| _        y )Nr   zx >= grid_dim[0]r   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dim
ValueErrorgrid_idxr   r   yzs       r   set_grid_idxzInterpreterBuilder.set_grid_idx   sf    4==##/004==##/004==##/00Aq	r   c                     |||f| _         y r   )r   )r   nxnynzs       r   set_grid_dimzInterpreterBuilder.set_grid_dim   s    Rr   c                 "    t         j                  S r   )rh   rk   r   s    r   get_half_tyzInterpreterBuilder.get_half_ty       zzr   c                 "    t         j                  S r   )rh   rn   r   s    r   get_bf16_tyzInterpreterBuilder.get_bf16_ty      {{r   c                 "    t         j                  S r   )rh   rl   r   s    r   get_float_tyzInterpreterBuilder.get_float_ty  r   r   c                 "    t         j                  S r   )rh   rm   r   s    r   get_double_tyz InterpreterBuilder.get_double_ty	  r   r   c                 "    t         j                  S r   )rh   r_   r   s    r   get_int8_tyzInterpreterBuilder.get_int8_ty  s    wwr   c                 "    t         j                  S r   )rh   r^   r   s    r   get_uint8_tyzInterpreterBuilder.get_uint8_ty      xxr   c                 "    t         j                  S r   )rh   ra   r   s    r   get_int16_tyzInterpreterBuilder.get_int16_ty  r   r   c                 "    t         j                  S r   )rh   r`   r   s    r   get_uint16_tyz InterpreterBuilder.get_uint16_ty      yyr   c                 "    t         j                  S r   )rh   rc   r   s    r   get_int32_tyzInterpreterBuilder.get_int32_ty  r   r   c                 "    t         j                  S r   )rh   rb   r   s    r   get_uint32_tyz InterpreterBuilder.get_uint32_ty  r   r   c                 "    t         j                  S r   )rh   rd   r   s    r   get_int64_tyzInterpreterBuilder.get_int64_ty  r   r   c                 "    t         j                  S r   )rh   r?   r   s    r   get_uint64_tyz InterpreterBuilder.get_uint64_ty!  r   r   c                 "    t         j                  S r   )rh   rq   r   s    r   get_fp8e4nv_tyz!InterpreterBuilder.get_fp8e4nv_ty$      }}r   c                 "    t         j                  S r   )rh   rs   r   s    r   get_fp8e4b15_tyz"InterpreterBuilder.get_fp8e4b15_ty'      ~~r   c                 "    t         j                  S r   )rh   rr   r   s    r   get_fp8e4b8_tyz!InterpreterBuilder.get_fp8e4b8_ty*  r   r   c                 "    t         j                  S r   )rh   ro   r   s    r   get_fp8e5_tyzInterpreterBuilder.get_fp8e5_ty-  r   r   c                 "    t         j                  S r   )rh   rp   r   s    r   get_fp8e5b16_tyz"InterpreterBuilder.get_fp8e5b16_ty0  r   r   c                 .    t        j                  ||      S r   )rh   ri   )r   elt_ty
addr_spaces      r   
get_ptr_tyzInterpreterBuilder.get_ptr_ty3  s    vz22r   c                 .    t        j                  ||      S r   )rh   rt   )r   r   r.   s      r   get_block_tyzInterpreterBuilder.get_block_ty6  s    }}UE**r   c                 ~    t        t        j                  |gt        j                        t        j
                        S Nr5   )r   r7   arraybool_rh   rj   r   r$   s     r   get_int1zInterpreterBuilder.get_int19  s$    BHHeWBHH=rwwGGr   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   r^   rh   r   s     r   	get_uint8zInterpreterBuilder.get_uint8<  $    BHHeWBHH=rxxHHr   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   r_   rh   r   s     r   get_int8zInterpreterBuilder.get_int8?  s$    BHHeWBGG<bggFFr   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   r`   rh   r   s     r   
get_uint16zInterpreterBuilder.get_uint16B  $    BHHeWBII>		JJr   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   ra   rh   r   s     r   	get_int16zInterpreterBuilder.get_int16E  r  r   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   rb   rh   r   s     r   
get_uint32zInterpreterBuilder.get_uint32H  r  r   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   rc   rh   r   s     r   	get_int32zInterpreterBuilder.get_int32K  r  r   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   r?   rh   r   s     r   
get_uint64zInterpreterBuilder.get_uint64N  r  r   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   rd   rh   r   s     r   	get_int64zInterpreterBuilder.get_int64Q  r  r   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   rk   rh   r   s     r   get_fp16zInterpreterBuilder.get_fp16T  $    BHHeWBJJ?LLr   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   rl   rh   r   s     r   get_fp32zInterpreterBuilder.get_fp32W  r  r   c                 ~    t        t        j                  |gt        j                        t        j                        S r   )r   r7   r   rm   rh   r   s     r   get_fp64zInterpreterBuilder.get_fp64Z  r  r   c                 X    t        t        j                  dgt        |            |      S Nr   r5   )r   r7   r   rw   )r   types     r   get_null_valuez!InterpreterBuilder.get_null_value]  s!    BHHaSd0CDdKKr   c                     | j                   t        d      t        t        j                  | j                   |   gt        j
                        t        j
                        S )Nzgrid_idx is Noner5   )r   r   r   r7   r   rc   rh   r   axiss     r   create_get_program_idz(InterpreterBuilder.create_get_program_ida  sD    == /00BHHdmmD&9%:"((KRXXVVr   c                     t        t        j                  | j                  |   gt        j                        t
        j                        S r   )r   r7   r   r   rc   rh   r  s     r   create_get_num_programsz*InterpreterBuilder.create_get_num_programsf  s.    BHHdmmD&9%:"((KRXXVVr   c                     t        t        j                  |j                  t              t
        j                        }d }| j                  ||||||      S r   )r   r7   	ones_liker   r   rh   rj   create_masked_load)r   ptr_0_1is_volatilemaskothers          r   create_loadzInterpreterBuilder.create_loadj  sA    BLL>H&&sD%RMMr   c                     t        t        j                  |j                  t              t
        j                        }| j                  |||d d       S r   )r   r7   r#  r   r   rh   rj   create_masked_store)r   r%  valr&  r'  r)  s         r   create_storezInterpreterBuilder.create_storeo  s:    BLL>H''S$dCCr   c                    |j                         }t        |      }|+t        t        j                  |j
                  |      |      }t        j                  |j
                  |j
                  |j
                  |      }	t        |	|      S r   )r   rw   r   r7   r   r   _interpreterload)
r   rE   r)  r*  cache_modifiereviction_policyr(  rC   dtype_nprets
             r   r$  z%InterpreterBuilder.create_masked_loads  si    &&( *= tyy!I8TE		499ejj(KC**r   c                 l    t        j                  |j                  |j                  |j                        S r   )r1  storer   )r   rE   r$   r)  r3  r4  s         r   r-  z&InterpreterBuilder.create_masked_store{  s#    !!$))UZZCCr   c                    |j                   j                  }|j                  }|t        j                  k(  r|t        j                  k(  s&|t        j                  k(  rY|t        j                  k(  rFt        |j                  ||d       j                  t        |            }t        ||j                        S t        |j                  j                  t        |            |j                        S r   )r   rA   rh   rn   rl   r   r   viewrw   r   r>   )r   srcdst_typesrc_element_typedst_element_typer   s         r   	cast_implzInterpreterBuilder.cast_impl  s    99++#??+0@BJJ0N

*/?2;;/N!#((,<>NPTUZZ[hiq[rsDhoo66h0G H(//ZZr   c                 &    | j                  ||      S r   r?  r   r;  r<  s      r   <lambda>zInterpreterBuilder.<lambda>      $..h2O r   c                 &    | j                  ||      S r   rA  rB  s      r   rC  zInterpreterBuilder.<lambda>  rD  r   c                 &    | j                  ||      S r   rA  rB  s      r   rC  zInterpreterBuilder.<lambda>  rD  r   c                 &    | j                  ||      S r   rA  rB  s      r   rC  zInterpreterBuilder.<lambda>  rD  r   c                 &    | j                  ||      S r   rA  rB  s      r   rC  zInterpreterBuilder.<lambda>  s    sH0M r   c                 &    | j                  ||      S r   rA  rB  s      r   rC  zInterpreterBuilder.<lambda>  rD  r   c                 &    | j                  ||      S r   rA  )r   r;  r<  	is_signeds       r   rC  zInterpreterBuilder.<lambda>  s    T^^CQY=Z r   c                     |j                   j                  }|j                  }t        |j                  |||      j	                  t        |            }t        ||j                        S r   )r   rA   r   r   r:  rw   r   )r   r;  r<  r   r=  r>  r   s          r   r   z"InterpreterBuilder.create_fp_to_fp  sU    99++#??chh(8:JMZ__`mnv`wxD(//22r   c                 r    t        |j                  j                  t        |            |j                        S r   )r   r   r:  rw   rA   rB  s      r   create_bitcastz!InterpreterBuilder.create_bitcast  s%    CHHMM-*ABHOOTTr   c                 x    t         ||j                  |j                        |j                  j                        S r   r   r   r   rA   )r   lhsrhsops       r   	binary_opzInterpreterBuilder.binary_op  s(    Bsxx2CII4D4DEEr   c                 D    | j                  ||t        j                        S r   rT  r7   addr   rQ  rR  s      r   rC  zInterpreterBuilder.<lambda>  s    S"&&)I r   c                 D    | j                  ||t        j                        S r   rT  r7   multiplyrX  s      r   rC  zInterpreterBuilder.<lambda>      S"++)N r   c                 D    | j                  ||t        j                        S r   rT  r7   dividerX  s      r   rC  zInterpreterBuilder.<lambda>  s    S")))L r   c                 D    | j                  ||t        j                        S r   )rT  r7   	remainderrX  s      r   rC  zInterpreterBuilder.<lambda>  s    S",,)O r   c                 D    | j                  ||t        j                        S r   rT  r7   subtractrX  s      r   rC  zInterpreterBuilder.<lambda>  r\  r   c                 D    | j                  ||t        j                        S r   rZ  rX  s      r   rC  zInterpreterBuilder.<lambda>      sC(M r   c                 D    | j                  ||t        j                        S r   r^  rX  s      r   rC  zInterpreterBuilder.<lambda>  s    S"))1T r   c                 &    | j                  ||      S r   create_idivrX  s      r   rC  zInterpreterBuilder.<lambda>      )9)9#s)C r   c                 &    | j                  ||      S r   ri  rX  s      r   rC  zInterpreterBuilder.<lambda>  rk  r   c                 D    | j                  ||t        j                        S r   rT  r7   fmodrX  s      r   rC  zInterpreterBuilder.<lambda>      S"'')J r   c                 D    | j                  ||t        j                        S r   rn  rX  s      r   rC  zInterpreterBuilder.<lambda>  rp  r   c                 D    | j                  ||t        j                        S r   rV  rX  s      r   rC  zInterpreterBuilder.<lambda>  s    sC(H r   c                 D    | j                  ||t        j                        S r   rc  rX  s      r   rC  zInterpreterBuilder.<lambda>  rf  r   c                 D    | j                  ||t        j                        S r   )rT  r7   
left_shiftrX  s      r   rC  zInterpreterBuilder.<lambda>  s    sC(O r   c                 D    | j                  ||t        j                        S r   )rT  r7   right_shiftrX  s      r   rC  zInterpreterBuilder.<lambda>  s    S"..)Q r   c                 D    | j                  ||t        j                        S r   rT  r7   r   rX  s      r   rC  zInterpreterBuilder.<lambda>      $..c2::*N r   c                 D    | j                  ||t        j                        S r   ry  rX  s      r   rC  zInterpreterBuilder.<lambda>  rz  r   c                 D    | j                  ||t        j                        S r   ry  rX  s      r   rC  zInterpreterBuilder.<lambda>      T^^Cbjj-Q r   c                 D    | j                  ||t        j                        S r   ry  rX  s      r   rC  zInterpreterBuilder.<lambda>      DNN3RZZ,P r   c                 D    | j                  ||t        j                        S r   rT  r7   r   rX  s      r   rC  zInterpreterBuilder.<lambda>  rz  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  rz  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r}  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   rT  r7   
less_equalrX  s      r   rC  zInterpreterBuilder.<lambda>      DNN3R]],S r   c                 D    | j                  ||t        j                        S r   rT  r7   lessrX  s      r   rC  zInterpreterBuilder.<lambda>      DNN3RWW,M r   c                 D    | j                  ||t        j                        S r   rT  r7   greater_equalrX  s      r   rC  zInterpreterBuilder.<lambda>      DNN3REUEU,V r   c                 D    | j                  ||t        j                        S r   rT  r7   greaterrX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   rT  r7   equalrX  s      r   rC  zInterpreterBuilder.<lambda>  s    4>>#sBHH+M r   c                 D    | j                  ||t        j                        S r   rT  r7   	not_equalrX  s      r   rC  zInterpreterBuilder.<lambda>  s    4>>#sBLL+Q r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>      DNN3RXX,N r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>      DNN3R\\,R r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   r  rX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   )rT  r7   bitwise_andrX  s      r   rC  zInterpreterBuilder.<lambda>      sC(P r   c                 D    | j                  ||t        j                        S r   )rT  r7   bitwise_xorrX  s      r   rC  zInterpreterBuilder.<lambda>  r  r   c                 D    | j                  ||t        j                        S r   )rT  r7   
bitwise_orrX  s      r   rC  zInterpreterBuilder.<lambda>  s    t~~c3'N r   c                     t        |j                  t        j                  |j                  |j                        z
  |j                  z  |j                  j
                        S r   )r   r   r7   ro  r   rA   rX  s      r   rj  zInterpreterBuilder.create_idiv  sC     SXX#(((CCPRUR[R[RbRbccr   c                 @   t        |j                  j                        }t        |j                  j                        }|j                  j                  |      |_        |j                  j                  |      |_        | j	                  ||t
        j                        S r   )re   r   r   r>   rT  r7   rw  )r   rQ  rR  	lhs_dtype	rhs_dtypes        r   create_ashrzInterpreterBuilder.create_ashr  se    (8	(8	88??9-88??9-~~c377r   c                 R   |j                   j                  }|t        j                  k(  s|t        j                  k(  r>t        t        |j                   |j                         |j                  j                        S t        t        d|j                  dz  dz         }|j                   j                  |      }|j                   j                  |      }t        j                  ||      |j                  dz  z	  }t        |j                  |      |j                  j                        S )Nry   r4   r   )r   r   r7   rd   r?   r   np_umulhi_u64rA   rz   itemsizer>   r[  )r   rQ  rR  r   compute_dtypelhs_datarhs_dataret_datas           r   create_umulhiz InterpreterBuilder.create_umulhi  s    BHH 2chh A399CSCSTT#B$u~~/AA/E.F(GHMxx}5Hxx}5H{{8X65>>A;MNH 6		8H8HIIr   c                     t         ||j                  |j                  |j                        |j                  j                        S r   rP  )r   rQ  rR  r*  rS  s        r   
ternary_opzInterpreterBuilder.ternary_op  s.    Bsxx5::>@R@RSSr   c                 F    | j                  |||t        j                        S r   )r  r7   clip)r   arglohipropagate_nanss        r   rC  zInterpreterBuilder.<lambda>  s    doocSUWY[][b[b>c r   c                 F    | j                  |||t        j                        S r   )r  r7   where)r   condrQ  rR  s       r   rC  zInterpreterBuilder.<lambda>  s    sCQSQYQY1Z r   c                     t        |j                  |j                  z  |j                  z   |j                  j                        S r   rP  r   s       r   
create_fmazInterpreterBuilder.create_fma  s,    AFFQVVOaff4aggnnEEr   c                 b    t         ||j                        |j                  j                        S r   rP  )r   r  rS  s      r   unary_opzInterpreterBuilder.unary_op  s!    BsxxL#))*:*:;;r   c                 2   |j                   }|j                  dz
  }t        t        d|j                         }|j                  j                  |      }d|z  dz
  }||z  j                  t        |            }t        ||j                   j                        S )Nr   ry   )	r   r6   rz   r7   r   r:  rw   r   rA   )r   r  rC   mask_bitwidthnp_uint_dtyper   r)  r6  s           r   create_fabszInterpreterBuilder.create_fabs  s    99 33a7d8+F+F*G$HIxx}}]+]"a'd{  x!89C!1!122r   c                 B    | j                  |t        j                        S r   )r  r7   cosr   r  s     r   rC  zInterpreterBuilder.<lambda>      4==bff#= r   c                 B    | j                  |t        j                        S r   )r  r7   expr  s     r   rC  zInterpreterBuilder.<lambda>  r  r   c                 B    | j                  |t        j                        S r   )r  r7   exp2r  s     r   rC  zInterpreterBuilder.<lambda>      DMM#rww$? r   c                 B    | j                  |t        j                        S r   )r  r7   absr  s     r   rC  zInterpreterBuilder.<lambda>  s    DMM#rvv$> r   c                 B    | j                  |t        j                        S r   )r  r7   floorr  s     r   rC  zInterpreterBuilder.<lambda>  s    T]]3%A r   c                 B    | j                  |t        j                        S r   )r  r7   ceilr  s     r   rC  zInterpreterBuilder.<lambda>  r  r   c                 B    | j                  |t        j                        S r   )r  r7   logr  s     r   rC  zInterpreterBuilder.<lambda>  r  r   c                 B    | j                  |t        j                        S r   )r  r7   log2r  s     r   rC  zInterpreterBuilder.<lambda>  r  r   c                 B    | j                  |t        j                        S r   r  r7   sqrtr  s     r   rC  zInterpreterBuilder.<lambda>	  s    DMM#rww,G r   c                 B    | j                  |t        j                        S r   r  r  s     r   rC  zInterpreterBuilder.<lambda>
  r  r   c                 B    | j                  |t        j                        S r   )r  r7   sinr  s     r   rC  zInterpreterBuilder.<lambda>  r  r   c                     |j                   j                  t        j                  k(  rt	        |j                         nt        |j                         }t        ||j                  j                        S r   )r   r   r7   rl   np_erf_fp32np_erf_fp64r   rA   )r   r  r6  s      r   
create_erfzInterpreterBuilder.create_erf  sH    '*xx~~'Ck#((#UXU]U]I^C!1!122r   c                     t        dt        j                  |j                        z  |j                  j
                        S )Nr   )r   r7   r  r   r   rA   r  s     r   create_rsqrtzInterpreterBuilder.create_rsqrt  s+    A 113993C3CDDr   c                 t    t        |j                  j                  |      |j                  j                        S r   )r   r   r=   r   rA   )r   r  r.   allow_reorders       r   rC  zInterpreterBuilder.<lambda>  s*    \#((JZJZ[`Jacfclclcscs=t r   c                 ~    t        t        j                  |j                  |      |j                  j
                        S r   )r   r7   	transposer   r   rA   )r   r  perms      r   create_transzInterpreterBuilder.create_trans  s(    BLL48#)):J:JKKr   c                    |j                   }|j                   }|j                  j                  dk(  r|j                  j                         s3|j                  j                  dk(  r|j                  j                         rt	        ||j                  t
        j                  d       j                  t        j                        }t	        ||j                  t
        j                  d       j                  t        j                        }t        t        j                  |||j                   j                        |j                   z   |j                  j                        S )Nr4   r5   )r   r   r6   is_floatingr   rh   rk   r:  r7   r   matmulrA   )r   r   r   dinput_precisionmax_num_imprecise_acca_datab_datas           r   
create_dotzInterpreterBuilder.create_dot  s    GG&&!+0C0C0EGG&&!+0C0C0E#FAGGRZZFKKBJJWF#FAGGRZZFKKBJJWFBIIffAFFLLIAFFRTUT[T[TbTbccr   c                 ~    t        t        j                  ||t        j                        t        j                        S r   )r   r7   r<   rc   rh   )r   startstops      r   create_make_rangez$InterpreterBuilder.create_make_range#  s$    BIIeTBBHHMMr   c                     t        t        j                  |j                  |d|f      d   t        j
                        S )Nr   )binsr:   )r   r7   	histogramr   rh   rc   )r   r   r  s      r   create_histogramz#InterpreterBuilder.create_histogram&  s1    BLLaYOPQRTVT\T\]]r   c                     |j                         }|j                  }t        d|dz        }t        |j                  ||j                  j                  t        j                        z  z   |j                        S )Nr   r4   )	r   r6   maxr   r   r>   r7   r?   r   )r   r%  offsetrC   element_bitwidthelement_bytewidths         r   create_addptrz InterpreterBuilder.create_addptr+  se    %%'#66#3q#89CHH'86;;;M;Mbii;X'XXZ]ZcZcddr   c                    |j                  |      \  }}|j                         }	t        |	      }
|d }n|t        j                  j
                  k(  r,t        t        j                  |j                  |
      |	      }na|t        j                  j                  k(  r6t        t        j                  |j                  t        d      |
      |	      }nt        d|       | j                  ||||||      S )Nr5   nanzunsupported padding option )rJ   r   rw   r   PADDING_OPTIONPAD_ZEROr   r7   r   r   PAD_NAN	full_likefloatr   r$  )r   r%  rB   padding_optionr3  r4  r(  rE   rF   rC   r5  r*  s               r   create_tensor_pointer_loadz-InterpreterBuilder.create_tensor_pointer_load2  s    ..~>e&&( *!Es11::: tyy!I8TEs11999 diiuX!VX`aE:>:JKLL&&tUE>?\ghhr   c                 T    |j                  |      \  }}| j                  |||||      S r   )rJ   r-  )r   r%  r$   rB   r3  r4  rE   rF   s           r   create_tensor_pointer_storez.InterpreterBuilder.create_tensor_pointer_storeA  s/    ..~>e''eUNO\\r   c                 ~    t        t        j                  |j                  |      |j                  j
                        S r   )r   r7   expand_dimsr   r   rA   )r   r  r  s      r   create_expand_dimsz%InterpreterBuilder.create_expand_dimsE  s(    BNN388T:CII<L<LMMr   c                 ~    t        t        j                  |j                  |      |j                  j
                        S r   )r   r7   r8   r   r   rA   r   r  r.   s      r   create_broadcastz#InterpreterBuilder.create_broadcastH  s(    BOOCHHe<cii>N>NOOr   c                 |    t        |j                  j                  t        j                        |j
                        S r   r   r   r>   r7   r?   rA   r   r.  r   s      r   create_int_to_ptrz$InterpreterBuilder.create_int_to_ptrK  $    CHHOOBII6FFr   c                 |    t        |j                  j                  t        j                        |j
                        S r   r!  r"  s      r   create_ptr_to_intz$InterpreterBuilder.create_ptr_to_intN  r$  r   c                     t        t        j                  |j                  |j                  g      |j                  j
                        S r   )r   r7   concatenater   r   rA   rX  s      r   
create_catzInterpreterBuilder.create_catQ  s/    BNNCHHchh+?@#))BRBRSSr   c                     t        t        j                  |j                  |j                  gd      |j                  j
                        S )Nr  )r   r7   stackr   r   rA   rX  s      r   create_joinzInterpreterBuilder.create_joinT  s1    BHHchh%9CSYYEUEUVVr   c                     t        |j                  d   |j                  j                        t        |j                  d   |j                  j                        fS )N).r   ).r   rP  )r   r.  s     r   create_splitzInterpreterBuilder.create_splitX  sE    SXXf-syy/?/?@,sxxX^O_adajajaqaqBrssr   c           	         t        |j                  t        j                        rVt	        t        j                  ||j                  d   t        |j                              |j                  j                        S t	        t        j                  ||j                  t        |j                              |j                  j                        S r  )
rg   r   rh   rt   r   r7   fullr   rw   rA   r  s      r   create_splatzInterpreterBuilder.create_splat\  s    cii/sxx{-PSPYPYBZ []`]f]f]m]mnnsxx}SYY?W XZ]ZcZcZjZjkkr   c                     || j                   vrt        d|       | j                   |   }t        t        j                  |j
                  |j
                  |j
                  |      |j                  j                        S )Nunsupported semantic )ir_sem_to_interpreter_semr   r   r1  
atomic_casr   r   rA   )r   r%  cmpr.  semscopes         r   create_atomic_casz$InterpreterBuilder.create_atomic_casb  sk    d4444SE:;;,,S1L33CHHchhRUVX[XaXaXhXhiir   c           	      X   || j                   vrt        d|       || j                  vrt        d|       | j                   |   }| j                  |   }t        t	        j
                  ||j                  |j                  |j                  |      |j                  j                        S )Nzunsupported rmwOp r5  )	ir_rmw_op_to_interpreter_rmw_opr   r6  r   r1  
atomic_rmwr   r   rA   )r   rmwOpr%  r.  r)  r9  r:  s          r   create_atomic_rmwz$InterpreterBuilder.create_atomic_rmwh  s    <<<1%9::d4444SE:;;44U;,,S1L33E388SXXtyyZ]^`c`i`i`p`pqqr   c                     t        d      )Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r   libNamelibPathsymbolargListretTypeisPures          r   create_extern_elementwisez,InterpreterBuilder.create_extern_elementwiseq  s    !"XYYr   c                     t        d      )Nz,inline_asm not supported in interpreter moderB  )r   	inlineAsmconstraintsvaluesr  rI  packs          r   create_inline_asmz$InterpreterBuilder.create_inline_asmt  s    !"PQQr   c                 *   d| j                   d    d| j                   d    d| j                   d    d}|r|d| z  }|rt        j                  dd	 i
       |D ]  }t        |d|j                   z           |rt        j                  d 
       y y )N(r   z, r   r   ) r   c                     d| dS )N0x02xr)   r   s    r   rC  z1InterpreterBuilder.create_print.<locals>.<lambda>}  s    b3L r   )	formatter)r   r7   set_printoptionsprintr   )r   prefixhexrN  msgr$   s         r   create_printzInterpreterBuilder.create_printw  s    $--"#2dmmA&6%7r$--:J9K1MQvh<C52H*IJ 	*E#!EJJ<(()	*$/ r   c                 ,    |sJ | d| d| d|        y )Nz in :r)   )r   	conditionmessagefileNamefuncNamelineNos         r   create_assertz InterpreterBuilder.create_assert  s&    HWIT(1XJaxHHyr   c                      y r   r)   r   s    r   create_barrierz!InterpreterBuilder.create_barrier  s    r   c                 f    |D cg c]  }|j                          }}t        ||||||      S c c}w r   )r!   r+   )	r   r-   r.   r/   r0   r1   r2   r  new_offsetss	            r   create_make_block_ptrz(InterpreterBuilder.create_make_block_ptr  s6    4;<&v||~<<!$w\SXYY =s   .c                    t        |j                        t        |      k7  rt        d      |j                  D cg c]  }|j                          }}t	        |j
                  |j                  |j                  ||j                  |j                        }t        t        |            D ]1  }|j                  |   xj                  ||   j                  z  c_        3 |S c c}w )Nz len(ptr.offsets) != len(offsets))r;   r0   r   r!   r+   r-   r.   r/   r1   r2   r:   r   )r   r%  r0   r  rj  r6  r   s          r   create_advancez!InterpreterBuilder.create_advance  s    s{{s7|+?@@47KK@&v||~@@ 399ckk;PSP`P`bebkbkls7|$ 	3AKKN71:??2	3
	 As   C c                     t        |      }d|j                  v r,t        t        j                  dd|      |j
                        S t        d|       )Nr\   r   r+  r5   zunsupported type )rw   namer   r7   r2  rA   	TypeError)r   r  np_types      r   get_all_ones_valuez%InterpreterBuilder.get_all_ones_value  sI    %GLL 2W =t{{KK/v677r   returnN)r&   r'   r(   r   MEM_SEMANTICACQUIREr1  RELEASERELAXEDACQUIRE_RELEASEr6  	ATOMIC_OPADDRMW_OPFADDMINUMINMAXUMAXANDORXORXCHGr=  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r
  r  r  r  r  r  r  r  r  r!  r+  r/  r$  r-  r?  create_si_to_fpcreate_ui_to_fpcreate_fp_to_sicreate_fp_to_uicreate_fp_extcreate_fp_trunccreate_int_castr   rN  rT  create_faddcreate_fmulcreate_fdivcreate_fremcreate_fsub
create_mulcreate_precise_divfcreate_sdivcreate_udivcreate_sremcreate_urem
create_add
create_sub
create_shlcreate_lshrcreate_minsicreate_minuicreate_minimumfcreate_minnumfcreate_maxsicreate_maxuicreate_maximumfcreate_maxnumfcreate_icmpSLEcreate_icmpSLTcreate_icmpSGEcreate_icmpSGTcreate_icmpULEcreate_icmpULTcreate_icmpUGEcreate_icmpUGTcreate_icmpEQcreate_icmpNEcreate_fcmpOLTcreate_fcmpOGTcreate_fcmpOLEcreate_fcmpOGEcreate_fcmpOEQcreate_fcmpONEcreate_fcmpULTcreate_fcmpUGTcreate_fcmpULEcreate_fcmpUGEcreate_fcmpUEQcreate_fcmpUNE
create_and
create_xor	create_orrj  r  r  r  create_clampfcreate_selectr  r  r  
create_cos
create_expcreate_exp2create_iabscreate_floorcreate_ceil
create_logcreate_log2create_precise_sqrtcreate_sqrt
create_sinr  r  create_reshaper  r   r  r  r  r  r  r  r  r#  r&  r)  r.  r0  r3  r;  r@  rJ  rP  r^  rf  rh  rk  rm  rr  r)   r   r   r   r      s     ,";";"C"C  ,";";"C"C  ,";";"C"C((,*C*C*S*S	! 	<..22L//44<..22L//44<..22L//44<..22,--00<..22L//44'#X"%
3+HIGKIKIKIMMMLW
WN
D+D[ POOOOOOOMMOOZO3UF JKNKLKOKNKMJTCKCKJKJKHJMJOJQKNLNLQOPNNLNLQOPNSNMNVNPNSNMNVNPNMMQMMNPNSNVNNNRNMNPNSNVNNNRNPJPJNId8	JT dMZMF<3 >J=J?K>KAL?K=J?KG?K=J3E uNLdN^
ei]NPGGTWtljrZR
0IZ
8r   r   c                 0    |dfd
}t        | ||       y )N)memberc           
      v     | |i |j                         D ci c]  \  }}|dk7  r|| c}}diS c c}}w )Nr   )items)r  argskwargskvbuilders        r   rC  z_patch_attr.<locals>.<lambda>  sZ     :hMS\\^AUEIQDEO BCA AU:h `g:h AUs   5)setattr)objro  r  r  
new_members      ` r   _patch_attrr    s    &, iJ Cz"r   c                     t        j                  |       D ]3  \  }}t        j                  j	                  |      s&t        | |||       5 y r   )inspect
getmembersrh   core
is_builtinr  )pkgr  ro  r  s       r   _patch_builtinr    s@    **3/ 4f77f%T6734r   c                 v    d d }d | _         fd| _        d | _        d | _        t	        |      | _        y )Nc                 f    | j                   j                  }|j                  dk(  rt        |      S dS )Nr   T)r   r   sizer   )r   r   s     r   	_get_boolz%_patch_lang_tensor.<locals>._get_bool  s,    {{ "YY!^tDz55r   c                     t         j                  j                  t        t	        j
                  | j                  j                        | j                  j                        | j                  j                        S r   )
rh   r  r   r   r7   r  r   r   r   rA   r   s    r   _get_transposez*_patch_lang_tensor.<locals>._get_transpose  sH    ww~~l2<<8H8H+I4;;K\K\]_c_i_i_p_pqqr   c                 @    t        | j                  j                        S r   )r\   r   r   r   s    r   rC  z$_patch_lang_tensor.<locals>.<lambda>  s    C(8(8$9 r   c                      |       S r   r)   )r   r  s    r   rC  z$_patch_lang_tensor.<locals>.<lambda>  s    9T? r   c                 @    t        | j                  j                        S r   )reprr   r   r   s    r   rC  z$_patch_lang_tensor.<locals>.<lambda>  s    4(8(8#9 r   c                 @    t        | j                  j                        S r   )r[   r   r   r   s    r   rC  z$_patch_lang_tensor.<locals>.<lambda>  s    #dkk&6&6"7 r   )	__index__r   __repr____str__propertyT)r   r  r  s     @r   _patch_lang_tensorr    s9    6r :F2FO9FO7FN'FHr   c                   0    e Zd Zd Zd Zd Zd Zd Zd Zy)ReduceScanOpIneterfacec                      || _         || _        y r   )r  
combine_fn)r   r  r  s      r   r   zReduceScanOpIneterface.__init__  s    	$r   c                 H    | |t        |      k\  rt        d| d|       y y )Nzaxis z out of bounds for shape )r;   r   )r   r.   r  s      r   
check_axisz!ReduceScanOpIneterface.check_axis  s4    E
 2uTF*CE7KLL !3r   c                     |D ]c  }t        |t        j                  j                        st	        dt        |             | j                  |j                  | j                         e y )Nzinput must be a tensor, got )	rg   rh   r  r   r   r  r  r.   r  )r   r   r  s      r   check_tensorz#ReduceScanOpIneterface.check_tensor  sP     	2Cc277>>2 #?S	{!KLLOOCIItyy1	2r   c                 "   t        |d      r-|j                  r!t        j                  ||j                        }n#t	        j
                  |gt        |            }|}t        j                  j                  t        ||j                        |      S )Nr.   r5   )r   r.   rh   rt   r7   r   rw   r  r   r   rA   )r   r6  r   ret_types       r   	to_tensorz ReduceScanOpIneterface.to_tensor  sb    3 SYY}}UCII6H((C5e(<=CHww~~l3=xHHr   c                 l    t        |t              s|f}| j                  |       | j                  |      S r   )rg   tupler  
apply_implr   r   s     r   applyzReduceScanOpIneterface.apply  s0    %'IE% u%%r   c                     t        d      )Nzapply_impl not implementedrB  r  s     r   r  z!ReduceScanOpIneterface.apply_impl  s    !">??r   N)	r&   r'   r(   r   r  r  r  r  r  r)   r   r   r  r    s#    %M2I&@r   r  c                   >     e Zd Z fdZd Zd ZddZd Zd Z xZ	S )	ReduceOpsc                 4    t         |   ||       || _        y r   )superr   	keep_dims)r   r  r  r  	__class__s       r   r   zReduceOps.__init__  s    z*"r   c                     g }|D ]e  }||j                  |       d}|j                  | j                  |j                  j                  j	                         |j
                               g t        |      |fS )Nr   )appendr  r   r   flattenr   r  )r   r   r  r6  r   s        r   unravelzReduceOps.unravel  sn     	SD

4 

4>>$++*:*:*B*B*DdjjQR	S Sz4r   c                 $     j                   } j                   j                         \  }g }g }d   j                  j                  j                  }|d| ||dz   d  z   }D ]k  }|j                  |j                  j                         |j                  t        j                  ||j                  j                  j                               m t        |d   j                        D ]Y  }	t        j                  |	|      d| |dz   d  z   t         fdt        |      D              }
|   dk(  rGt        t        |            D ]/  }|
|   j                  j                  j                         ||   <   1 t         fdt        |      D              }  j                   j"                  g ||
 }t%        |t              s|fn|}t        t        |            D ][  }t%        ||   t&        j(                  j*                        r'||   j                  j                  j                         n||   ||   <   ] \ g }t        |      D ]  \  }	} j,                  rI|t        j.                  ||      }nBt        t        |            D ]  }t        j.                  |d      } n||j                         }|j                   j1                  ||	   j                                t        |      dk(  r|d   S t        |      S )Nr   r   r5   c              3   h   K   | ])  \  }}j                  |   |   j                         + y wr   r  r   ).0iir  r   input_indexr   s      r   	<genexpr>z+ReduceOps.generic_reduce.<locals>.<genexpr>  s/     sTYTVXYq~uRy Os   /2c              3   h   K   | ])  \  }}j                  |   |   j                         + y wr   r  )r  oior   output_indexr   s      r   r  z+ReduceOps.generic_reduce.<locals>.<genexpr>	  s/     !wW\WY[\$..<%)//"R!wr  )r  r
  r   r   r.   r  r7   zerosr   r:   r  unravel_indexr  	enumerater;   itemr  fnrg   rh   r  r   r  r  r  )r   r   original_axisr  
input_dataoutput_datainput_shapeoutput_shaper  r   input_tuplej	acc_tuplecombine_fn_retr6  r   _r  r  s   ``               @@r   generic_reducezReduceOps.generic_reduce  s   		ll5$))4t
Ahoo**00"1T*[-CC 	TCcjjoo.rxxCJJOO<Q<QRS	T z!}))* 	HA**1k:K&q.TAXY1GGLs]fgq]rssK4 A%s;/0 UA3>q>3H3H3M3M3R3R3TKN<0U "!w`iju`v!ww	!3!3!3!MY!M!M6@QV6W^.]k	s;/0 HAV`!!bggnnW69Q<3F3F3K3K3P3P3R;DQ<  N<0H	H"  - 	=GAt~~ ,>>$5D"3{#34 7!~~dA67 &yy{JJt~~dE!HNN;<	= SQs1v6E#J6r   c                    t        |t              r|d   n|}d }d }|rM| j                   ||j                  j                  | j
                  | j                        |j                        }|rQ| j                   ||j                  j                  | j
                  | j                        t        j                        }||||fS ||S ||S t        d      )Nr   r  keepdimsz-val_reduce_op and idx_reduce_op are both None)rg   r  r  r   r   r  r  r   rh   rc   r   )r   r   val_reduce_opidx_reduce_opr.  idxs         r   min_maxzReduceOps.min_max  s    &ue4a%..u||/@/@tyy[_[i[i!jlqlwlwxC..u||/@/@tyy[_[i[i!jlnltltuC?s8O_J_JLMMr   c                     | j                  t        j                  |j                  j                  | j
                  | j                        |j                        S )Nr(  )r  r7   sumr   r   r  r  r   r  s     r   r/  zReduceOps.sum1  s<    ~~bffU\\%6%6TYYQUQ_Q_`bgbmbmnnr   c                 2   | j                   t        j                  j                  k(  r3| j	                  |d   t
        j                  t
        j                        S | j                   t        j                  j                  k(  r3| j	                  |d   t
        j                  t
        j                        S | j                   t        j                  j                  k(  r%| j	                  |d   t
        j                  d       S | j                   t        j                  j                  k(  r%| j	                  |d   t
        j                  d       S | j                   t        j                  j                  k(  r| j                  |d         S | j                  |      S )Nr   )r*  r+  )r  rh   standard_argmin_combine_tie_break_leftr-  r7   minargmin_argmax_combine_tie_break_leftr
  argmax_elementwise_max_elementwise_min_sum_combiner/  r&  r  s     r   r  zReduceOps.apply_impl4  s   ??bkkHHH<<abii<XX__ J JJ<<abii<XX__ < <<<<ad<SS__ < <<<<ad<SS__ 8 8888E!H%% &&u--r   r   )
r&   r'   r(   r   r
  r&  r-  r/  r  __classcell__r  s   @r   r  r    s$    # )7VN$o.r   r  c                   6     e Zd Z fdZd Zd Zd Zd Z xZS )ScanOpsc                 4    t         |   ||       || _        y r   )r  r   reverse)r   r  r  r?  r  s       r   r   zScanOps.__init__F  s    z*r   c                     | j                  t        j                  |j                  j                  | j
                        |j                        gS Nr,  r5   )r  r7   cumsumr   r   r  r   r  s     r   rB  zScanOps.cumsumJ  s8    ryy):):KSXS^S^_``r   c                     | j                  t        j                  |j                  j                  | j
                        |j                        gS rA  )r  r7   cumprodr   r   r  r   r  s     r   rD  zScanOps.cumprodM  s8    rzz%,,*;*;$))LTYT_T_`aar   c           	          g }g }d   j                   j                  j                  }D ]k  }|j                  |j                   j                         |j                  t	        j
                  ||j                   j                  j                               m t        |d   j                        D ]|  }t	        j                  ||      t         fdt        |      D              } j                     dk(  rGt        t        |            D ]/  }||   j                   j                  j                         ||   <   1 t         fdt        t                    D              t         fdt        |      D              }	  j                  j                   g |	| }
t#        |
t              s|
fn|
}	t        t        |            D ][  }t#        |	|   t$        j&                  j(                        r'|	|   j                   j                  j                         n|	|   ||   <   ]  g }t        |      D ]3  \  }}|j                   j+                  ||   j                               5 |S )Nr   r5   c              3   h   K   | ])  \  }}j                  |   |   j                         + y wr   r  )r  r  r  indexr   r   s      r   r  z'ScanOps.generic_scan.<locals>.<genexpr>[  s,     fur1%%)//Bfr  c              3   V   K   | ]   }|j                   k(  r|   d z
  n|    " yw)r   Nr,  )r  r   rG  r   s     r   r  z'ScanOps.generic_scan.<locals>.<genexpr>a  s-     "kTU1		>58a<uQx#O"ks   &)c              3   h   K   | ])  \  }}j                  |   |   j                         + y wr   r  )r  r  r  r   
prev_indexr   s      r   r  z'ScanOps.generic_scan.<locals>.<genexpr>b  s/     !uUZUWYZ$..:b	"P!ur  )r   r   r.   r  r7   r  r   r:   r  r  r  r  r  r;   r  r  r  rg   rh   r  r   r  )r   r   r  r  r.   r  r   r   r"  r#  r$  r6  rG  rJ  s   ``          @@r   generic_scanzScanOps.generic_scanP  s!   
a$$** 	MCcjjoo.rxxSZZ__5J5JKL	M z!}))* 	HA$$Q.EfPYZdPeffDTYY1$s;/0 GA,0GNN,?,?,D,D,FKN5)G #"kY^_bch_iYj"kk
!!u^ghs^t!uu	!3!3!3!FY!F!F6@QV6W^.]k	s;/0 HAOY!!bggnnP6IaL,?,?,D,D,I,I,K;DQ<  N5)H	H"  - 	=GAtJJt~~dE!HNN;<	=
r   c           	         g }| j                   rf|D ]`  }|j                  | j                  t        j                  |j
                  j                  | j                        |j                               b n|}| j                  t        j                  j                  k(  r| j                  |d         }nM| j                  t        j                  j                  k(  r| j                  |d         }n| j!                  |      }| j                   rK|D ]F  }t        j                  |j
                  j                  | j                        |j
                  _        H t#        |      dk(  xr |d   xs t%        |      S )Nr,  r   r   )r?  r  r  r7   flipr   r   r  r   r  rh   r1  r9  rB  _prod_combinerD  rK  r;   r  )r   r   	new_inputr  r6  s        r   r  zScanOps.apply_impln  s   	<< f  

dii0XZ]ZcZc!def I??bkk666++il+C__ 9 99,,y|,C ##I.C<< K"$''#**//		"J

K3x1}'Q55:5r   )	r&   r'   r(   r   rB  rD  rK  r  r:  r;  s   @r   r=  r=  D  s    ab<6r   r=  c                      dd} dd}| t         _        |t         _        | t         j                  _        |t         j                  _        y )Nc                 :    t        |||      j                  |       S r   )r  r  )r   r  r  r  r  s        r   _new_reducez'_patch_reduce_scan.<locals>._new_reduce  s    z95;;EBBr   c                 :    t        |||      j                  |       S r   )r=  r  )r   r  r  r?  r  s        r   	_new_scanz%_patch_reduce_scan.<locals>._new_scan  s    tZ177>>r   )F)rh   reduceassociative_scanr  )rR  rT  s     r   _patch_reduce_scanrW    s5    C? BI#B BGGN(BGGr   c                    d }d	d}d
d}d }|| _         || _        || _        t        | _        || j
                  _        t        |d      | _        t        |d      | _	        t        |d      | _
        t                y )Nc                 >   | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j	                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   d	k(  r|j                         S | j                   d
k(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j!                         S | j                   dk(  r|j#                         S t%        d|  d      )Nvoidrj   r_   r^   ra   r`   rc   rb   rd   r?   fp8e5fp8e4nvfp8e4b15fp16bf16fp32fp64zfail to convert z to ir type)ro  get_void_tyget_int1_tyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r  s     r   
_new_to_irz$_patch_lang_core.<locals>._new_to_ir  s   99&&((YY& &&((YY& &&((YY'!''))YY'!''))YY("((**YY'!''))YY("((**YY'!''))YY("((**YY'!''))YY)#))++YY*$**,,YY& &&((YY& &&((YY& ''))YY& ((**+D6=>>r   c                 :    |d}|d| }}n| |}}t        |||      S )Nr   r   )r:   )arg1arg2stepr  r  ends         r   
_new_rangez$_patch_lang_core.<locals>._new_range  s2    <D<D3Et3EUC&&r   c                     | sJ |       y r   r)   )r  r]  s     r   _new_static_assertz,_patch_lang_core.<locals>._new_static_assert  s    Str   c                    t        | t        j                        s| S t        |t        t        f      s|gn|}|D cg c]*  }t        |t        j
                        r|j                  n|, }}t        |      t        dt        | j                              k7  rt        d|       | j                  j                  ||       | S c c}w )Nr   z$len(values) != len(input.shape) for )rg   rh   r   listr  	constexprr$   r;   r
  r.   r   r   r%   )r   rN  ro  r  s       r   	_set_attrz#_patch_lang_core.<locals>._set_attr  s    %+L!+FT5M!B&IOPAZ2<<8!''a?PPv;#aU[[!122CD6JKKdF+	 Qs   /Cztt.divisiblity)ro  ztt.contiguityztt.constancy)NN) )r:   static_rangestatic_assertrZ  static_printr   to_irr   multiple_ofmax_contiguousmax_constancyrW  )langrd  rj  rl  rp  s        r   _patch_lang_corerz    sx    $?P'
 DJ"D+DD!DJJy/?@D!)/BD @Dr   c                    | j                   j                         D cg c]   \  }}|t        t        j                  fv s|" }}}t	        |      dk(  sJ d       t        |d   t               t        |d   j                  t               |d   t        k(  rt        |d   j                  t               t        |d   j                         t        |d          y c c}}w )Nr   z:triton.language must be visible from within jit'd functionr   )__globals__r  rh   r  r;   r  interpreter_builderr   r   r  rz  )r  r%  r$   ry  s       r   _patch_langr~    s    "$.."6"6"8ShaEb"'']<RESDSt9>WWW>47/047>>#67Aw"}tAw||%89tAw~~&T!W Ts
    CCc                 h   t        | t              rIt        j                  t        j
                  j                  j                  j                  t        j
                  j                  j                  j                  |                   }t        j                  }d| cxk  rdk  rn nt        j                  }nkd| cxk  rdk  rn nt        j                  }nLd| cxk  rdk  rn nt        j                  }n-d| cxk  rdk  rn nt        j                  }nt        d|        t!        t        j"                  | g|      |      }t        j$                  ||      S t'        | d	      rt        j                  t        j
                  j                  j                  j                  t        j
                  j                  j                  j                  |                   }t!        t        j"                  | j)                         gt        j                        |      }t        j$                  ||      S | S )
Ni   l        l        l         l            l            zUnsupported integer value r5   data_ptr)rg   r\   rh   	str_to_tytritonruntimejitJITFunction_type_of_key_ofr7   rc   rb   rd   r?   r   r   r   r   r   r  )r  tyr   r   s       r   _implicit_cvtr    sx   #s\\&..,,88AA&..BTBTB`B`BhBhilBmnoS 5 HHEc!E!IIEs"U"HHEc!E!IIE9#?@@bhhuE:B?yy$$sJ\\&..,,88AA&..BTBTB`B`BhBhilBmnobhh'7ryyI2Nyy$$Jr   )	num_warps
num_stagesnum_ctasenable_fp_fusiongridmaxnregc                   $    e Zd Zd Zd Zd Zd Zy)GridExecutorc                 
   ddl m} || _        || _        || _        |j
                  j                         D ci c]  \  }}| ||       }}}|D cg c]  }|j                  |      dk(  s| c}| _        y c c}}w c c}w )Nr   )_normalize_tyro  )	r  r  r  	arg_namesr  rZ   r  get
constexprs)r   r  r  r  r  ro  r  rZ   s           r   r   zGridExecutor.__init__  sx    &"	CECUCUC[C[C]^xtR4r!22^^,5bD9L9LT9RVa9a4b _bs   A:B -B c                    g }|D ]?  }t        |d      r |j                  |j                                /|j                  |       A i }|j                         D ]*  \  }}t        |d      r|j                         ||<   &|||<   , ||fS Nr  )r   r  cpur  )r   args_devr  args_hstr  
kwargs_hstr#   r$   s           r   _init_args_hstzGridExecutor._init_args_hst  s     	%CsJ'	*$		% 
 ,,. 	(JCuj)"'))+
3"'
3		(
 ##r   c                    t        ||      D ]P  \  }}t        |d      s|j                  j                  |j	                  |j
                        j                         R |j                         D ]U  \  }}||   }	t        |d      s|j                  j                  |	j	                  |j
                        j                         W y r  )zipr   r   copy_todevicer  )
r   r  r  r  r  arg_devarg_hstr#   	kwarg_dev	kwarg_hsts
             r   _restore_args_devzGridExecutor._restore_args_dev$  s     #Hh 7 	DGWw
+""7::gnn#=#B#BC	D
 %lln 	JNC"3Iy*-$$Y\\)2B2B%C%H%HI	Jr   c                    |j                         D ci c]  \  }}|t        vs|| }}}|j                  dd      ry | j                  ||      \  }}t	        | j
                         t        j                  | j
                  g|i |}|j                         D 	ci c]!  \  }}	||| j                  v r|	n
t        |	      # }}}	t        | j                        r| j                  |      n| j                  }
t        |
      dk  sJ d       |
ddt        |
      z
  z  z   }
t        j                  |
  	 t        |
d         D ]Q  }t        |
d         D ]>  }t        |
d         D ]+  }t        j!                  |||        | j
                  d	i | - @ S 	 | j)                  ||||       y c c}}w c c}	}w # t"        $ r}t%        t'        |            |d }~ww xY w)
NwarmupF   z#grid must have at most 3 dimensions)r   r   r   r   r)   )r  RESERVED_KWSpopr  r~  r  r  getcallargsr  r  callabler  r;   r}  r   r:   r   	Exceptionr   r  r  )r   r  r  r  r  r  r  r  ro  r  r  r   r   r   es                  r   __call__zGridExecutor.__call__/  s   #)<<>K41aQl5J!Q$KK::h&#228VD*DGG ""477DXDD^b^h^h^jkQZQUWZTT__4c-:LLkk"*499"5tyy4994yA~DDD~eq3t9}--(($/	347^ (tAw (A"47^ (+88AqA$((( 	x6:F3 L l  	3"47+2	3s)   F/F/ &F57A"F; ;	GGGN)r&   r'   r(   r   r  r  r  r)   r   r   r  r  	  s    c$ 	JGr   r  c                   0    e Zd ZddZed        Z d Zd Zy)InterpretedFunctionNc                      | _          fd}| _        t        j                  |      }|j                  j                         D cg c]  }|j                   c} _        y c c}w )Nc                  \    |d   } t        j                  j                  |      | i |S )Nr  r  r  r  )r  r  r  r   s      r   runz)InterpretedFunction.__init__.<locals>.runR  s/    &>D><>OOOr   )r  r  r  	signature
parametersrN  ro  r  )r   r  r  r  r  s   `    r   r   zInterpretedFunction.__init__O  sQ    	P %%b)	*3*>*>*E*E*GHQ!&&HHs   A"c                 .    | j                   j                  S r   )r  r&   r   s    r   r&   zInterpretedFunction.__name__Z  s    wwr   c                 D    t        | j                  | j                  |      S r   r  )r   r  s     r   __getitem__zInterpretedFunction.__getitem__^  s    DGGT^^T::r   c                     t        | j                         	  | j                  |i |S # t        $ r}t        t	        |            |d }~ww xY wr   )r~  r  r  r   r  )r   r  r  r  s       r   r  zInterpretedFunction.__call__a  sJ    DGG	3477D+F++ 	3"47+2	3s   ) 	AAArs  )r&   r'   r(   r   r  r  r  r)   r   r   r  r  M  s&    	I    ;3r   r  )4r  typingr   r   numpyr7   r  triton.languagelanguagerh   dataclassesr   errorsr   	functoolsr   _C.libtritonr	   r1  r
   r   r   r+   rM   re   rw   r   r   r   	vectorizerl   r  rm   r  r?   r  r   r   r  r  r  r  r  r=  rW  rz  r~  r  r}  r  r  r  r)   r   r   <module>r     sR         ! $  6 $ : 6 $+ + +	@='@
# bll45bll45Z<g gH8 H8V#4($@ @D].& ].@;6$ ;6|) K\. )*  ^AG AGH3 3r   