
    wg                      d dl mZ d dlmZmZmZmZmZ ddlm	Z	 ddl
mZ ddl
mZ  ed      Z G d	 d
e      ZdgdZdgdZdhdZdidZdjdZ	 	 	 dk	 dldZdmdZdmdZdmdZdmdZdmdZdndZdmdZdodZdodZdpdZ 	 	 	 	 dqdZ!dmdZ"dmdZ#dmdZ$dmdZ%dmd Z&drd!Z'dmd"Z(dmd#Z)dmd$Z*dsd%Z+dtd&Z,dud'Z-dvd(Z.dmd)Z/dmd*Z0dmd+Z1dmd,Z2dmd-Z3dmd.Z4dwd/Z5dxd0Z6dyd1Z7dzd2Z8d{d3Z9d|d4Z:d}d5Z;d~d6Z<dd7Z=dd8Z>dd9Z?dd:Z@dd;ZA	 d	 	 	 dd=ZBd> ZCd? ZDd@ ZEdA ZFdB ZGdC ZHdD ZIdE ZJdF ZK	 	 	 	 	 	 	 	 	 	 	 	 ddGZL	 	 	 	 ddHZMddIZNdJ ZOdK ZP	 	 	 	 	 	 ddLZQddMZR	 	 	 	 ddNZSddOZTddPZUddQZVddRZWddSZXddTZY	 	 	 	 ddUZZdV Z[	 	 	 	 	 	 ddWZ\ddXZ]dY Z^ddZZ_	 	 	 	 dd[Z`dd\Zadd]Zbdd^Zcdd_Zddd`ZeddaZfddbZgdc ZhdddZiddeZjddfZky<)    )annotations)ListOptionalSequenceTupleTypeVar   )ir   )core)mathTc                       e Zd Z fdZ xZS )IncompatibleTypeErrorImplc                    || _         || _        d| j                   j                         z   dz   | j                  j                         z   | _        t        t
        |   | j                         y )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__s      ]/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/triton/language/semantic.pyr   z"IncompatibleTypeErrorImpl.__init__   sX    2T[[5I5I5KKgUX\XcXcXlXlXnn'7E    )__name__
__module____qualname__r   __classcell__)r   s   @r   r   r      s    F Fr   r   c                    | dvrt        d|        t        j                  |j                  |       t        j                        S )Nr   r   r	   z+program_id axis must be 0, 1, or 2 but got )
ValueErrortltensorcreate_get_program_idint32axisbuilders     r   
program_idr+      s=    9FtfMNN99W2248"((CCr   c                    | dvrt        d|        t        j                  |j                  |       t        j                        S )Nr"   z-num_programs axis must be 0, 1, or 2 but got )r#   r$   r%   create_get_num_programsr'   r(   s     r   num_programsr.       s=    9HOPP99W44T:BHHEEr   c                `   | j                   }|j                   }| j                  }|j                  }||k(  r	||kD  r| S |S |t        j                  j                  j
                  k(  r	||k\  r| S |S |t        j                  j                  j
                  k(  r	||k\  r|S | S t        d| d|       )Nzunexpected signedness r   )int_bitwidthint_signednessr$   dtype
SIGNEDNESSUNSIGNED	TypeError)a_tyb_tya_rankb_ranka_snb_sns         r   integer_promote_implr<   +   s    FFDD t|t0D0	$$--	-'t1T1	$$--	-'t1T1
,TF%v>
??r   c                @   | j                         s|j                         rt        j                  S | j                         s|j                         rt        j                  S | j                         s|j                         r"|rt        j                  S t        j                  S | j                         s|j                         rR|rt        j                  S | j                         r |j                         rt        j                  S t        j                  S | j                         r|j                         st        d|  d|       |rL| j                  |j                  k7  r3t        d| j                         z   dz   |j                         z   dz         t        | |      S )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)is_fp64r$   float64is_fp32float32is_fp16float16is_bf16bfloat16is_intr5   r1   r   r<   )r6   r7   
div_or_mods      r   computation_type_implrJ   ;   s7    ||~zz ||~zz ||~::::||~::<<>dlln;;zz;;=*4&dV<== d))T-@-@@5G'QTXTaTaTcckk l 	l  d++r   c                    | j                         rL|st        | |      |j                         r| |k7  rt        | |      |j                         rt        | |      y y N)is_ptrr   is_floating)r   r   allow_ptr_as      r   check_ptr_type_implrP   c   s[    }}+FF;;==?& 0+FF;;+FF;;   r   c                H   t        | ||      \  } }| j                  j                  }|j                  j                  }t        |||       t        |||       |rG|j	                         s7|j	                         s't        |||      }	t        | |	|      } t        ||	|      }| |fS rL   )broadcast_impl_valuetypescalarrP   rM   rJ   cast)
lhsrhsr*   allow_lhs_ptrallow_rhs_ptrarithmetic_checkrI   
lhs_sca_ty
rhs_sca_ty
ret_sca_tys
             r   binary_op_type_checking_implr^   o   s     $Cg6HCJJ
J>
J>
 1 1 3J<M<M<O*:z:N
3
G,3
G,8Or   c                   t        | ||dd      \  } }| j                  j                  }|j                  j                  }|j                         r|j                         rt	        d      |j                         r@|j                         s0|| }} | j                  j                  }|j                  j                  }|j                         rDt        j                  |j                  | j                  |j                        | j                        S |j                         rDt        j                  |j                  | j                  |j                        | j                        S |j                         rDt        j                  |j                  | j                  |j                        | j                        S t	        d|       )NTzcannot add pointers togetherr>   )r^   rS   rT   rM   r5   r$   r%   create_addptrhandlerN   create_faddrH   
create_addinputotherr*   input_scalar_tyother_scalar_tys        r   addri      sO   /ugtTRLE5jj''Ojj''OO$:$:$<677 (>(>(@eu**++**++yy..u||U\\JEJJWW		$	$	&yy,,U\\5<<H%**UU				!yy++ELL%,,GTT
&&78
99r   c           	     z   t        | ||dd      \  } }| j                  j                  }|j                         rNt	        j
                  |j                  | j                  t        ||      j                        | j                        S |j                         rDt	        j
                  |j                  | j                  |j                        | j                        S |j                         rDt	        j
                  |j                  | j                  |j                        | j                        S t        d|       )NTFr>   )r^   rS   rT   rM   r$   r%   r`   ra   minusrN   create_fsubrH   
create_subr5   re   rf   r*   	scalar_tys       r   subrp      s    /ugtUSLE5

!!Iyy..u||U5'=R=Y=YZ\a\f\fggyy,,U\\5<<H%**UU				yy++ELL%,,GTT
&yk2
33r   c                   t        | ||      \  } }| j                  j                  }|j                         rDt	        j
                  |j                  | j                  |j                        | j                        S |j                         rDt	        j
                  |j                  | j                  |j                        | j                        S t        d|       Nr>   )r^   rS   rT   rN   r$   r%   create_fmulra   rH   
create_mulr5   rn   s       r   mulru      s    /ugFLE5

!!Iyy,,U\\5<<H%**UU				yy++ELL%,,GTT
&yk2
33r   c           	     6   t        | ||dddd      \  } }| j                  j                  }|j                  j                  }|j                         r|j	                         rt        |||      }n|j	                         r|j                         rt        | ||      } n|j	                         rG|j	                         r7t        | t        j                  |      } t        |t        j                  |      }nc|j                         rE|j                         r5|j                  |j                  kD  rt        |||      }nt        | ||      } nt        d|       t        j                  |j                  | j                  |j                        | j                        S NFTr>   )r^   rS   rT   rN   rH   rU   r$   rC   fp_mantissa_widthr5   r%   create_fdivra   rd   s        r   truedivrz      sH   /ugueUY[_`LE5jj''Ojj''O""$)?)?)AUOW5				!o&A&A&CUOW5				!o&<&<&>UBJJ0UBJJ0		$	$	&?+F+F+H,,/P/PP9E9E *?*;<==99W((u||DejjQQr   c           	     Z   t        | ||dddd      \  } }| j                  j                  }|j                  j                  }|j                         r|j                         rt	        ||      }t        | ||      } t        |||      }|j                         rDt        j                  |j                  | j                  |j                        | j                        S t        j                  |j                  | j                  |j                        | j                        S t        d|       rw   )r^   rS   rT   rH   r<   rU   is_int_signedr$   r%   create_sdivra   create_udivr5   )re   rf   r*   rg   rh   ret_tys         r   floordivr      s    /ugueUY[_`LE5jj''Ojj''OO$:$:$<%oGUFG,UFG,!99W00u||LejjYY99W00u||LejjYY
&&78
99r   c           	     d   | j                   j                  }|j                   j                  }|j                         r|j                         st        d      t	        | ||dddd      \  } }|j                  | j                  |j                        }t        j                  || j                         S )Nz4both operands of fdiv must have floating scalar typeFT)	rS   rT   rN   r5   r^   ry   ra   r$   r%   )re   rf   ieee_roundingr*   rg   rh   rets          r   fdivr      s    jj''Ojj''O&&(0K0K0MNOO/ugueUZ\`aLE5


ellELL
9C99S%**%%r   c                   t        | ||dddd      \  } }| j                  j                  }|j                  j                  }|j                         r;t	        | t        t        j                  t        | |d|      |      ||      |      }|S |j                         r|j                  |j                  k7  r3t        d|j                         z   dz   |j                         z   dz         |j                         rDt        j                  |j!                  | j"                  |j"                        | j                        S t        j                  |j%                  | j"                  |j"                        | j                        S t        d|       )NFT_builderzCannot mod z by r?   r>   )r^   rS   rT   rN   rp   ru   r   floorr   rH   r1   r5   r   r|   r$   r%   create_sremra   create_urem)re   rf   r*   ro   rh   r   s         r   modr      sR   /ugueUY[_`LE5

!!Ijj''O%TZZUE5'(JU\]_dfmnpwx
				##'E'EEMI,>,>,@@6IOLdLdLff jo o p p ""$99W00u||LejjYY99W00u||LejjYY
&yk2
33r   c                f   t        | ||      \  } }| j                  }|j                         r|t        j                  j
                  k(  rDt        j                  |j                  | j                  |j                        | j                        S |t        j                  j                  k(  rDt        j                  |j                  | j                  |j                        | j                        S t        d|       |j                         rDt        j                  |j                  | j                  |j                        | j                        S |j                         rDt        j                  |j!                  | j                  |j                        | j                        S t#        d|       NzUnexpected propagate_nan Unexpected dtype )r^   r2   rN   r$   PropagateNanALLr%   create_minimumfra   rS   NONEcreate_minnumfr#   r|   create_minsiis_int_unsignedcreate_minuir5   xypropagate_nanr*   r2   s        r   minimumr     3   '1g6DAqGGEBOO///99W44QXXqxxH!&&QQboo22299W33AHHahhGPP8HII				yy--ahhA166JJ				 yy--ahhA166JJ+E7344r   c                f   t        | ||      \  } }| j                  }|j                         r|t        j                  j
                  k(  rDt        j                  |j                  | j                  |j                        | j                        S |t        j                  j                  k(  rDt        j                  |j                  | j                  |j                        | j                        S t        d|       |j                         rDt        j                  |j                  | j                  |j                        | j                        S |j                         rDt        j                  |j!                  | j                  |j                        | j                        S t#        d|       r   )r^   r2   rN   r$   r   r   r%   create_maximumfra   rS   r   create_maxnumfr#   r|   create_maxsir   create_maxuir5   r   s        r   maximumr     r   r   c                X   t        |||      \  }}t        | ||      \  } }t        | ||      \  } }| j                  }|j                         rPt        j                  |j                  | j                  |j                  |j                  |      | j                        S t        d| d      )Nr   z(. Only floating point clamp is supported)	r^   r2   rN   r$   r%   create_clampfra   rS   r5   )r   minmaxr   r*   r2   s         r   clampr   %  s    +Cg>HC)!S':FAs)!S':FAsGGEyy..qxxSZZQ^_abagaghh+E72Z[\\r   c                @   t        | ||ddd      \  } }| j                  j                  }|j                  j                  }|j                         r|j                         st	        ||      t        ||      }||k7  rt        | ||      } ||k7  rt        |||      }| |fS )NF)r^   rS   rT   rH   r   r<   rU   )re   rf   r*   input_sca_tyother_sca_tyr]   s         r   bitwise_op_type_checking_implr   6  s    /ugueUZ[LE5::$$L::$$L (;(;(='lCC%lLAJ\!UJ0\!UJ0%<r   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rL   )r   r$   r%   
create_andra   rS   re   rf   r*   s      r   and_r   E  >    0wGLE599W''ellCUZZPPr   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rL   )r   r$   r%   	create_orra   rS   r   s      r   or_r   J  s>    0wGLE599W&&u||U\\BEJJOOr   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rL   )r   r$   r%   
create_xorra   rS   r   s      r   xor_r   O  r   r   c                   | j                   j                         s t        | t        j                  d      |      } |j                   j                         s t        |t        j                  d      |      }t        | ||      S Nint1)rS   is_int1bitcastr$   r2   r   r   s      r   logical_andr   T  s_    ::rxx/9::rxx/9ug&&r   c                   | j                   j                         s t        | t        j                  d      |      } |j                   j                         s t        |t        j                  d      |      }t        | ||      S r   )rS   r   r   r$   r2   r   r   s      r   
logical_orr   \  s_    ::rxx/9::rxx/9ueW%%r   c                    | j                   j                         s t        | t        j                  d      |      } t        | |      S r   )rS   r   r   r$   r2   invert)re   r*   s     r   not_r   d  s6    ::rxx/9%!!r   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rL   )r   r$   r%   create_lshrra   rS   r   s      r   lshrr   j  >    0wGLE599W((u||DejjQQr   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rL   )r   r$   r%   create_ashrra   rS   r   s      r   ashrr   o  r   r   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rL   )r   r$   r%   
create_shlra   rS   r   s      r   shlr   t  r   r   c                    | S rL    )re   s    r   plusr   ~  s    Lr   c                   | j                   j                  }|j                         rt        d|j	                         z   dz         t        j                  |j                  |j                  |            |      }t        || |      S )Nz$wrong type argument to unary minus ())
rS   rT   rM   r#   r   r$   r%   get_null_valueto_irrp   )re   r*   r   _0s       r   rk   rk     sr    ::$$L?,BWBWBYY\__``	7)),*<*<W*EF	UBr5'""r   c                .   | j                   j                  }|j                         s|j                         rt	        d|j                         z   dz         t        j                  |j                  |j                  |            |      }t        | ||      S )Nz%wrong type argument to unary invert (r   )rS   rT   rM   rN   r#   r   r$   r%   get_all_ones_valuer   r   )re   r*   r   _1s       r   r   r     s}    ::$$L 8 8 :@<CXCXCZZ]``aa	7--l.@.@.IJL	YBr7##r   c                    | j                   j                         st        j                  S | j                   j                  }t        j
                  t        j                  |      S rL   )rS   is_blockr$   r   shape
block_type)vr   s     r   
_bool_liker     s;    66??wwFFLLE==%((r   c                \   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         r|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t	        j
                  |j                  | j                  |j                        t        |             S t        d|       rr   )r^   rS   rT   rN   r$   r%   create_fcmpOGTra   r   rH   r|   create_icmpSGTcreate_icmpUGTr5   rn   s       r   greater_thanr         /ugFLE5

!!Iyy//ellKZX]M^__				""$99W33ELL%,,OQ[\aQbcc99W33ELL%,,OQ[\aQbcc
&yk2
33r   c                \   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         r|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t	        j
                  |j                  | j                  |j                        t        |             S t        d|       rr   )r^   rS   rT   rN   r$   r%   create_fcmpOGEra   r   rH   r|   create_icmpSGEcreate_icmpUGEr5   rn   s       r   greater_equalr     r   r   c                \   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         r|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t	        j
                  |j                  | j                  |j                        t        |             S t        d|       rr   )r^   rS   rT   rN   r$   r%   create_fcmpOLTra   r   rH   r|   create_icmpSLTcreate_icmpULTr5   rn   s       r   	less_thanr     r   r   c                \   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         r|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t	        j
                  |j                  | j                  |j                        t        |             S t        d|       rr   )r^   rS   rT   rN   r$   r%   create_fcmpOLEra   r   rH   r|   create_icmpSLEcreate_icmpULEr5   rn   s       r   
less_equalr     r   r   c                   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t        d|       rr   )r^   rS   rT   rN   r$   r%   create_fcmpOEQra   r   rH   create_icmpEQr5   rn   s       r   equalr         /ugFLE5

!!Iyy//ellKZX]M^__				yy..u||U\\JJW\L]^^
&yk2
33r   c                   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t        d|       rr   )r^   rS   rT   rN   r$   r%   create_fcmpUNEra   r   rH   create_icmpNEr5   rn   s       r   	not_equalr     r   r   c                   t        | t              rt        |t              st        d      t        | dz	        }t        |dz	        }|s|rt        d      || k  rt        d      || z
  }||dz
  z  dk7  rt        d      |g}t	        j
                  t        j                  |      }t	        j                  |j                  | |      |      S )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)	
isinstanceintr#   boolr$   r   r'   r%   create_make_range)startendr*   is_start_int64is_end_int64ranger   r   s           r   aranger    s    eS!C)=JKK%2+&Nr	?L344
e|XYY%KE!>??GE]]288U+F99W..uc:FCCr   c                   t        |t        j                        r.|j                  j                  dk(  sJ d       t        |||      }nj|t        d      |dk(  r!|j                  |j                  |            }n!t        |d|j                         } ||      }t        j                  ||      }t        || |      S )Nr   zonly accepts size-1 tensorz2dtype must be specified when value is not a tensorr   get_)r   r$   r%   numelvaluerU   r#   r   r   getattrnamesplat)r   r
  r2   r*   get_value_fns        r   fullr    s    %#{{  A%C'CC%UE7+ =QRRA:**5;;w+?@E"7d5::,,?@L 'E		%'w''r   c                   | j                   j                         rJ d       t        |      dk(  r| S t        j                  | j
                  |      }t        j                  |j                  | j                  |      |      S )NzCannot splat a block tensorr   )	rS   r   lenr$   r   r2   r%   create_splatra   )r
  r   r*   r   s       r   r  r    sd    zz""$C&CC$
5zQ]]5;;.F99W))%,,>GGr   c                   d}|D ]  }||z  }	 | j                   j                  |k7  rt        d      t        j                  | j                   j
                  |      }t        j                  |j                  | j                  ||      |      S )Nr   z:reshape() cannot change total number of elements in tensor)	rS   r	  r#   r$   r   rT   r%   create_reshapera   )re   	dst_shapecan_reorderr*   r	  sr   s          r   reshaper  %  s|    E 
zz5 UVV]]5::,,i8F99W++ELL)[QSYZZr   c                   | j                   D cg c]  }t        j                  |       }}|j                  |d       | j                  j                         st        | ||      S t        j                  | j                  j                  |      }t        j                  |j                  | j                  |      |      S c c}w )Nr   )r   r*   )r   r$   _constexpr_to_valueinsertrS   r   r  r   rT   r%   create_expand_dimsra   )re   r)   r*   r   r  r   s         r   expand_dimsr  /  s    49KK@q''*@I@T1:: U)W==]]5::,,i8F99W//dCVLL As   Cc                L   |sJ d       t        | j                        dk(  sJ t        j                  | j                  j
                  | j                  d   |j                  d   z   g      }t        j                  |j                  | j                  |j                        |      S )Nz;current implementation of `cat` always may reorder elementsr   r   )	r  r   r$   r   rS   rT   r%   
create_catra   )rV   rW   r  r*   ret_types        r   catr!  :  s|    UUU;syy>Q}}SXX__syy|ciil/J.KLH99W''

CJJ?JJr   c                   t        | ||      \  } }| j                  g k(  }|rt        | d|      } t        |d|      }t        | j                  d   t        j
                        rt	        j
                  d      }nd}| j                  |gz   }t	        j                  | j                  j                  |      }t	        j                  |j                  | j                  |j                        |      }|rt        |dgd|      }|S )Nr   r	   Fr  r*   )rR   r   r  r   r$   	constexprr   rS   rT   r%   create_joinra   r  )abr*   
was_rank_1two	new_shaper   r   s           r   joinr,  A  s    1g.DAq BJ1g&1g&!''"+r||,ll1o3%I}}QVV]]I6H
))G''!((;X
FCcA3E7CJr   c                   t        | j                        dkD  sJ t        j                  | j                  d         dk(  sJ | j                  d d }t        j                  | j
                  j                  |      }|j                  | j                        \  }}t        j                  ||      t        j                  ||      fS )Nr   r#  r	   )
r  r   r$   r  r   rS   rT   create_splitra   r%   )r'  r*   r+  r   outLHSoutRHSs         r   splitr1  Z  s    L1""1772;/1454I}}QVV]]I6H))!((3NFF
		&(#
		&(# r   c                   t        | j                        t        |      k7  rt        d      t        d |D              t	        t        t        |                  k7  rt        d|       t        j                  | j                  j                  |D cg c]  }| j                  |    c}      }t        j                  |j                  | j                  |      |      S c c}w )Nz5permute dims must have the same length as input shapec              3  F   K   | ]  }t        j                  |        y wrL   )r$   r  ).0ds     r   	<genexpr>zpermute.<locals>.<genexpr>j  s     6Ab$$Q'6s   !z?permute dims must be a permutation of 0, 1, ..., n-1, but were )r  r   r#   sortedlistr  r$   r   rS   rT   r%   create_transra   )re   dimsr*   r5  r   s        r   permuter;  g  s    
5;;3t9$PQQ666$uSY?O:PPZ[_Z`abb}}UZZ..0NAQ0NOH99W))%,,=xHH 1Os   C 
c                   | j                   j                         sPt        j                  | j                   |      }t        j                  |j                  | j                  |      |      S | j                   j                         }t        |      t        |      k7  rt        d| d|       ||k(  r| S t        |      D ]0  \  }}||   |k7  s|dk7  st        d||    d| d| d| d| 
       t        j                  | j                   j                  |      }t        j                  |j                  | j                  |      |      S )Nz!Cannot broadcast, rank mismatch: , r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )rS   r   r$   r   r%   r  ra   get_block_shapesr  r#   	enumeraterT   create_broadcast)re   r   r*   r   	src_shapeiitems          r   broadcast_impl_shaperE  q  s;   :: uzz51yy--ellEBFKK

++-I
9~U#<YKr%QRR	Y' <48t	RSXYZS[R\ ]??Cf E!!"2i[5'; < <<
 ]]5::,,e4F99W--ellEBFKKr   c           	        | j                   }|j                   }|j                         r||j                         slt        j                  |j                  |j
                        }t        j                  |j                  |j                  |j                               |      }| |fS |j                         s||j                         rlt        j                  |j                  |j
                        }t        j                  |j                  | j                  |j                               |      } | |fS |j                         r|j                         r|j                         }|j                         }t        |      t        |      k  rt        t        |      t        |            D ]p  }t        j                  |j                  | j                  d      t        j                  |j                  dg|z               } | j                   }|j                         }r nt        |      t        |      k  rt        t        |      t        |            D ]p  }t        j                  |j                  |j                  d      t        j                  |j                  dg|z               }|j                   }|j                         }r t        |      t        |      k(  sJ g }t        |      D ]q  \  }	}
||	   }|
dk(  r|j                  |       "|dk(  s||
k(  r|j                  |
       >t        dt!        |	      z   dz   t!        |
      z   dz   t!        |      z          ||k7  rPt        j                  |j                  |      }t        j                  |j#                  | j                  |      |      } ||k7  rPt        j                  |j                  |      }t        j                  |j#                  |j                  |      |      }| |fS )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index r>  r   )rS   r   r$   r   rT   r   r%   r  ra   r?  r  r  r  r@  appendr#   strrA  )rV   rW   r*   lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaperC  leftrightr   s                r   rR   rR     sQ   XXFXXF !2v}}fll;ii,,SZZ9P9P9RSU[\V 8OS __6??#4v}}fll;ii,,SZZ9P9P9RSU[\N 8OK 
	v0++-	++-	y>C	N*3y>3y>: 6ii : :3::q I "fmmaS9_ MO"335		6
 ^c)n,3y>3y>: 6ii : :3::q I "fmmaS9_ MO"335		6
 9~Y///	 + 	aGAtaLEqy  '1*%4-  &  "-/21v"68<"=?B4y"IKR"SUXY^U_"` a a	a 	!]]6==)<F))G44SZZKVTC	!]]6==)<F))G44SZZKVTC8Or   c                    | y | dk(  rt         j                  j                  S | dk(  rt         j                  j                  S t	        d|  d      )NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r
   ROUNDING_MODERTNERTZr#   )rounding_modes    r   _str_to_rounding_moderX    sU    $$$###
.}o=mn
oor   c                F   | j                   }|j                         r8t        j                  |j                  | j                   j                               }||k(  r| S |j                  }|j                  }|j                         s|j                         rt        | ||      S |j                  }|j                  }||k7  r&t        dt        |      z   dz   t        |      z         t        j                  |j                  | j                  |j                  |            |      S )Nz!Cannot bitcast data-type of size z to data-type of size )rS   r   r$   r   rT   r?  rM   rU   primitive_bitwidthr#   rH  r%   create_bitcastra   r   )re   dst_tyr*   src_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitss           r   r   r     s    ZZFv}}ejj.I.I.KLJJj//1E67++,,H,,H8<s8}L P. .03H> ? 	?99W++ELL&,,w:OPRXYYr   Nc                   | j                   }t        |t        j                        r|j                  }t        |t        j                        r|j                  }|j                         r8t        j                  |j                  | j                   j                               }||k(  r| S |j                  }|j                  }t        |      }d}|j                         rf|j                         rV|j                  |j                  k  r=|t        j                  j                  }nH|t        j                  j                  k7  r+d}n(|&t        dt!        |      z   dz   t!        |      z         |j#                         s|j#                         r|j$                  j&                  sJ d       |j)                         s|j)                         r<|j*                  j-                  d      	 J d        |j*                  d   | |||      S |j/                         r|j                         s"|j                         r|j/                         s|r@t        j0                  |j3                  | j4                  |j7                  |      |      |      S |j9                         r|j;                         r |j=                         r6|j;                         s&t?        t?        | t        j@                  |      ||      S |j                         xr+ |j                         xr |j                  |j                  kD  }|r?t        j0                  |jC                  | j4                  |j7                  |            |      S |j                         xr+ |j                         xr |j                  |j                  k  }	|	r?t        j0                  |jE                  | j4                  |j7                  |            |      S |jG                         r|jG                         r|jH                  |jH                  k7  s|jJ                  |jJ                  k7  r|jM                         xr |jO                          }
|jO                         rW| jP                  j7                  |      }t        j0                  |jS                  |      | jP                        }tU        | ||      S t        j0                  |jW                  | j4                  |j7                  |      |
      |      S |jY                         r|jG                         r|jO                         rW| jP                  j7                  |      }t        j0                  |jS                  |      | jP                        }tU        | ||      S |jM                         r?t        j0                  |j[                  | j4                  |j7                  |            |      S t        j0                  |j]                  | j4                  |j7                  |            |      S |jG                         r|jY                         r|jO                         s|jM                         s?t        j0                  |j_                  | j4                  |j7                  |            |      S t        j0                  |ja                  | j4                  |j7                  |            |      S |jc                         r|jG                         r|jH                  }|d	k(  r?t        j0                  |je                  | j4                  |j7                  |            |      S |d
k(  rWtU        t?        | t        jf                  |      t        j0                  |ji                  d      t        jf                        |      S |jG                         rO|jc                         r?t        j0                  |jk                  | j4                  |j7                  |            |      S |jc                         rO|jc                         r?t        j0                  |jm                  | j4                  |j7                  |            |      S J d|  d|        )NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is z4fp8e4nv data type is not supported on CUDA arch < 89convert_custom_typesz0target doesn't provide conversion for this type.r   @   r   r   zcannot cast z to )7rS   r   r$   r%  r
  r   r   rT   r?  rX  rN   rZ  r
   rT  rU  r#   rH  
is_fp8e4nvoptionsallow_fp8e4nvis_fp8e4b15codegen_fnsgetis_fp8r%   create_fp_to_fpra   r   rD   rB   rF   rU   rC   create_fp_trunccreate_fp_extrH   r0   r1   r|   is_boolr2   r   r   create_int_castis_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fprM   create_ptr_to_intint64	get_int64create_int_to_ptrr[  )re   r\  r*   fp_downcast_roundingr]  r^  r_  use_custom_roundingtruncate_fpext_fpsign_extendtyr   bitwidths                 r   rU   rU     s   ZZF&",,'&5399v}}ejj.I.I.KLJJ 11EFJ$:$: %

'
'**G*G
G'@P@P@U@U)=!R%5%5%:%::RV<O+ 68;JHJefhklvhwx y y 	:#8#8#:,,d.dd, J$:$:$<""&&"$+/0 	d1c	d 0:w""#9:5&J^ipqq 	
 6 6 8 Z%6%6%8yy00v||G?TVjkmstt 	Z%7%7%9Z%7%7%9D

G4j'JJ
 ((* F F%%
(E(EE  yy00v||G?TUW]^^ ##% F F%%
(E(EE  yy..u||V\\'=RSU[\\ z002:#:#::j>W>W[e[t[t>t ..0M9K9K9M5M""7+B711"5u{{CBUB0099W44U\\6<<PWCXZefhnoo &&(Z->->-@""7+B711"5u{{CBUB00%%'99W44U\\6<<PWCXY[abb99W44U\\6<<PWCXY[abb z>>@z'?'?'A99W44U\\6<<PWCXY[abb99W44U\\6<<PWCXY[abb z002**r>99W66u||V\\RYEZ[]cddq=T%7;RYYwGXGXYZG[]_]e]e=fhopp z002yy225<<gAVWY_`` z002yy//fll7>STV\]]4LtF8445r   c                    t         j                  j                  }| rQ| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S t        d|  d      |S )Nz.ca.cgCache modifier  not supported)r
   CACHE_MODIFIERr   CACGr#   cache_modifiercaches     r   _str_to_load_cache_modifierr  Q  st    ""EU"%%((E
 L	 u$%%((E L ~.>nMNNLr   c                d   t         j                  j                  }| r| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S | dk(  rt         j                  j
                  }|S | dk(  rt         j                  j                  }|S t        d|  d      |S )Nz.wbr  z.csz.wtr  r  )r
   r  r   WBr  CSWTr#   r  s     r   _str_to_store_cache_modifierr  ]  s    ""EU"%%((E L u$%%((E L u$%%((E
 L	 u$%%((E L ~.>nMNNLr   c                    t         j                  j                  }| rQ| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S t        d|  d      |S )N
evict_lastevict_firstzEviction policy r  )r
   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr#   )eviction_policyevictions     r   _str_to_eviction_policyr  m  su    !!((Hl*))44H
 O	 -))55H O //@OPPOr   c                    d }| rQ| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S t	        d|  d      |S )NzeronanzPadding option r  )r
   PADDING_OPTIONPAD_ZEROPAD_NANr#   )padding_optionpaddings     r   _str_to_padding_optionr  y  sh    GV#''00G
 N	 u$''//G N ~.>nMNNNr   c                d   t         j                  j                  }| r| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S | dk(  rt         j                  j
                  }|S t        d|  d      |S )Nacquirereleaseacq_relrelaxedMemory semantic r  )r
   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr#   )
sem_optionsems     r   _str_to_semr    s    
//
)
)C"//))C J 9$//))C J 9$//11C
 J	 9$//))C J /
|>JKKJr   c                "   t         j                  j                  }| rr| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S t        d|  d      |S )Ngpuctasysr  r  )r
   MEM_SYNC_SCOPEGPUCTASYSTEMr#   )scope_optionscopes     r   _str_to_scoper    s    !!E5 %%))E L U"%%))E
 L	 U"%%,,E L /~^LMMLr   c                ~   | rt        | d      s| g} | D cg c]*  }t        |t        j                        r|j                  n|, } }| D ]+  }t        |t
              rd|cxk  rt        |      k  r(J  J  t        |       dkD  sJ t        |       t        t        |             k(  sJ d       t        |       S yc c}w )N__iter__r   z'Duplicate dimension in `boundary_check`r   )	hasattrr   r$   r%  r
  r   r  setr7  )boundary_checkblock_shapeelemdims       r   _canonicalize_boundary_checkr    s    ~z2,-N]klUY
4(F$**DPll! 	HCc3'A,Gs;7G,GGG,GGG	H>"Q&&&>"c#n*=&>>i@ii>n%% ms   /B:c	           
        ||t        d      | j                  j                  j                  }	|	t        j                  k7  sJ d       |	j                         r(|t        j                  j                  k(  rt        d      | j                  j                  }
t        ||
j                               }t        j                  |j                  | j                  |||||      |
      S )NK`mask` and `other` arguments cannot be specified for loading block pointers3`tl.int1` should be rewrited in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r#   rS   
element_tyr$   r   rH   r
   r  r  r  r?  r%   create_tensor_pointer_loadra   )ptrmaskrf   r  r  r  r  is_volatiler*   elt_tyr\  s              r   _load_block_pointerr    s     5,fggXX  ++FRWWSSS}}7b&7&7&?&??[\\ XX  F 2.&BYBYB[\N 99**3::~wPUW_almouw wr   c	           
        | j                   j                  j                         s't        d| j                   j	                          d      ||t        d      |s|rt        d      | j                   j                         sN|r%|j                   j                         rt        d      |r%|j                   j                         rt        d      | j                   j                         rN|%t        || j                   j                         |      }|%t        || j                   j                         |      }| j                   j                  }	|	j                  }
|
t        j                  k(  r=t        j                  }
t        j                  |
|	j                        }	t        | |	|      } |t        ||
|      }| j                   j                         r1| j                   j                         }t        j                  |
|      }n|
}|2t        j                   |j#                  | j$                  |||      |      S t        j                   |j'                  | j$                  |j$                  |r|j$                  nd |||      |      S )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)rS   rT   rM   r#   r   r   rE  r?  r  r$   r   int8pointer_typeaddress_spacerU   r   r%   create_loadra   create_masked_load)r  r  rf   r  r  r  r  r  r*   ptr_tyr  r   r\  s                r   _load_legacyr    s$   88??!!#01B1B1D0E]STT |)DEE. T U 	U
 88DII&&(deeUZZ((*eff xx'chh.G.G.I7SD(0I0I0KWUE XX__FF )=)=>3( UFG, xx))+vu-  |yy,,SZZ+VX^__yy&&szz4;;PU[_afhp'245;= 	=r   c	                   t        |      }	t        |      }
t        |      }| j                  j	                         r7| j                  j
                  j                         rt        | |||||	|
||	      S t        | |||||	|
||	      S rL   )	r  r  r  rS   rM   r  r   r  r  )r  r  rf   r  r  r  r  r  r*   r  r  r  s               r   loadr     s     (7E&7H$^4G
xxSXX0099;"3e^WeU]_jlstt CunguhXcelmmr   c           	         t        ||d      }|j                  | j                  ||j                  |      t	        |      t        |            }t        j                  ||      S NFrequire_i64)_convert_to_ir_valuescreate_descriptor_loadra   r   r  r  r$   r%   )desc_ptroffsetsr  r  rS   r*   r   s          r   descriptor_loadr    sW    #GW%HG&&xGAT'B>'R'>'O	QA 99Qr   c                    t        ||d      }t        j                  |j                  | j                  |j                  |      t        j
                        S r  )r  r$   r%   create_descriptor_storera   void)r  r
  r  r*   s       r   descriptor_storer    s@    #GW%HG99W44X__ellT[\^`^e^effr   c           	        |t        d      | j                  j                  j                         }|j                  j	                         st        |||      }|j                  j	                         sJ d       ||j                  j                         k(  s&J d| d|j                  j                          d       | j                  j                  j                  |j                  j                  k(  s@J d| j                  j                  j                   d|j                  j                   d       | j                  j                  j                  }|t        j                  k7  sJ d       t        ||      }t        |||      }t        j                  |j                  | j                  |j                  |||      t        j                        S )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r#   rS   r  r?  r   rE  r$   r   r  rU   r%   create_tensor_pointer_storera   r  )	r  valr  r  r  r  r*   r  r  s	            r   _store_block_pointerr    s    fgg ((%%668K88"3W=88O OO#((33   ]	k]"4SXX5N5N5P4QQ[\] 88))SXX-@-@@  qDWX[X`X`XkXkXvXvWw  xQ  RU  RZ  RZ  Re  Re  Qf  fp  Cq  q@XX  ++FRWWSSS 2.+NN sFG
$C 99W88SZZQ_afhpqWW r   c           	        | j                   j                  j                         s't        d| j                   j	                          d      |rt        d      | j                   j                         sL|j                   j                         rt        d      |r%|j                   j                         rt        d      | j                   j                         rLt        || j                   j                         |      }|%t        || j                   j                         |      }| j                   j                  }|j                  }|t        j                  k(  r=t        j                  }t        j                  ||j                        }t        | ||      } t        |||      }|sJt        j                  |j!                  | j"                  |j"                  ||      t        j$                        S |j                   j                  j'                         st        d      t        j                  |j)                  | j"                  |j"                  |j"                  ||      t        j$                        S )Nr  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  z"Mask must have boolean scalar type)rS   rT   rM   r#   r   r   rE  r?  r  r$   r   r  r  r  rU   r%   create_storera   r  ro  create_masked_store)	r  r  r  r  r  r  r*   r  r  s	            r   _store_legacyr  ;  s   88??!!#01B1B1D0E^TUU  A B 	B
 8888effDII&&(dee xx"3(A(A(CWM'chh.G.G.I7SDXX__FF )=)=>3( sFG
$C yy--cjj#**eXVXZX_X_``99##%=>>99W00SZZV[]efhjhohoppr   c           	        t        |      }t        |      }| j                  j                         s$| j                  j                  j                         rt        d      | j                  j                         r5| j                  j                  j                         rt        | ||||||      S t        | ||||||      S )N"Cannot store to a constant pointer)r  r  rS   is_constrT   r#   rM   r  r   r  r  )	r  r  r  r  r  r  r*   r  r  s	            r   storer  g  s     )8E&7H
xxchhoo668=>>
xxSXX0099;#CdNE8U\]] S#t^UHgVVr   c           	     B   t        |      }t        |      }| j                  j                  j                  }|j
                  dvrt        d      t        j                  |j                  | j                  |j                  |j                  ||      |j                        S )N)   r   rd  z9atomic_cas only supports elements with width {16, 32, 64})r  r  rS   rT   r  rZ  r#   r$   r%   create_atomic_casra   )r  cmpr  r  r  r*   r  s          r   
atomic_casr  }  s|    
c
C% E++J$$L8TUU99W..szz3::szzSVX]^`c`h`hiir   c                   | j                   j                  j                         s&t        d| j                   j	                         z         | j                   j                         s$| j                   j                  j                         rt        d      | j                   j                  j                  }|t        j                  u r|dk7  rt        d|z   dz         |t        j                  t        j                  t        j                  t        j                  fv rt        d|z   dz   t        |      z         | j                   j                         rN|%t        || j                   j!                         |      }|%t        || j                   j!                         |      }t#        || j                   j                  j                  |      }|s|j%                  d      }t        j                  }| j                   j                         rf|j'                  || j                   j!                               }t        j(                  t        j                  | j                   j!                               }t        j*                  ||      }| ||fS )Nz)Pointer argument of store instruction is r  ri   atomic_z does not support fp16z does not support T)rS   rT   rM   r#   r   r  r  r$   rE   r   r  int16rG   rH  r   rE  r?  rU   get_int1r  r   r%   )r  r  r  opr*   r  mask_irmask_tys           r   atom_red_typechecking_implr    s   88??!!#DsxxGXGXGZZ[[
xxchh11::<=>>++JRZZB%KR*BBCCbggrww"++>>R*>>ZPQQ
xx'chh.G.G.I7SD?&sCHH,E,E,GQC
sCHHOO..
8C""4(''88**7CHH4M4M4OPGmmBGGSXX-F-F-HIGyy'*T>r   c                   t        | ||d|      \  } }}t        |      }t        |      }|j                  j                  }|j                         r|j                         rjt        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S |t        j                  t        j                   hvrt#        d|       t%        g d||      }|t        j                  k(  rt        j&                  nt        j(                  }t+        |||      }	t+        | t        j,                  |d      |      }
|t        j                  k(  rt        j.                  nt        j0                  }t+        |||      }t+        | t        j,                  |d      |      }t3        |||      }t5        |||      }t        j                  |j                  t        j                  j                  |
j                  |	j                  t7        |||      j                  ||      |	j                        }t        j                  |j                  t        j                  j8                  |j                  |j                  t7        |||      j                  ||      |j                        }t;        ||||      }t+        |||      S )Nr   z#atomic_max not supported for dtype         r   )r  r  r  rS   rT   rH   r|   r$   r%   create_atomic_rmwr
   	ATOMIC_OPMAXra   UMAXrC   rA   r5   r  r'   rw  r   r  uint32uint64r   r   r   UMINwherer  r  r  r  r  r*   sca_tyr  i_typei_vali_ptrui_typeui_valui_ptrposnegpos_retneg_retr   s                      r   
atomic_maxr       /S$wONCd
c
C% EXX__F}}!99))",,*:*:CJJ

TXT_T_adfklnqnvnvx x 99))",,*;*;SZZUYU`U`beglmorowowy y
 bjj"**--=fXFGGC)D2::-RXX288FC)EC3W=E!RZZ/biiRYYGS'7+FS"//'15w?F
T7
+C
Cw
'Cii!!",,"2"2ELL%,,"&tS'":"A"A3	OPUPZPZ\G ii!!",,"3"3V]]FMM"&tS'":"A"A3	OPVP[P[]G Wgw
/C3((r   c                   t        | ||d|      \  } }}t        |      }t        |      }|j                  j                  }|j                         r|j                         rjt        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S |t        j                  t        j                   hvrt#        d|       t%        g d||      }|t        j                  k(  rt        j&                  nt        j(                  }t+        |||      }	t+        | t        j,                  |d      |      }
|t        j                  k(  rt        j.                  nt        j0                  }t+        |||      }t+        | t        j,                  |d      |      }t3        |||      }t5        |||      }t        j                  |j                  t        j                  j                  |
j                  |	j                  t7        |||      j                  ||      |	j                        }t        j                  |j                  t        j                  j8                  |j                  |j                  t7        |||      j                  ||      |j                        }t;        ||||      }t+        |||      S )Nr   z#atomic_min not supported for dtype r  r   )r  r  r  rS   rT   rH   r|   r$   r%   r  r
   r  MINra   r
  rC   rA   r5   r  r'   rw  r   r  r  r	  r   r   r   r  r  r  s                      r   
atomic_minr    r  r   c           
        t        | ||d|      \  } }}t        |      }t        |      }|j                  j                  }|j                         rt        j                  j                  nt        j                  j                  }t        j                  |j                  || j                  |j                  |j                  ||      |j                        S )Nri   )r  r  r  rS   rT   rN   r
   r  FADDADDr$   r%   r  ra   )r  r  r  r  r  r*   r  r  s           r   
atomic_addr     s    /S$wONCd
c
C% EXX__F$002		8H8HB99W..r3::szz4;;X[]bcehememnnr   c           
     (   t        | ||d|      \  } }}t        |      }t        |      }t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S )Nand)r  r  r  r$   r%   r  r
   r  ANDra   rS   r  r  r  r  r  r*   s         r   
atomic_andr%    x    /S$wONCd
c
C% E99W..r||/?/?SZZY]YdYdfikpqXX r   c           
     (   t        | ||d|      \  } }}t        |      }t        |      }t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S )Nor)r  r  r  r$   r%   r  r
   r  ORra   rS   r$  s         r   	atomic_orr*     sv    /S$gNNCd
c
C% E99W..r||

CJJX\XcXcehjopXX r   c           
     (   t        | ||d|      \  } }}t        |      }t        |      }t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S )Nxor)r  r  r  r$   r%   r  r
   r  XORra   rS   r$  s         r   
atomic_xorr.    r&  r   c           
     (   t        | ||d|      \  } }}t        |      }t        |      }t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S )Nxchg)r  r  r  r$   r%   r  r
   r  XCHGra   rS   r$  s         r   atomic_xchgr2    sx    /S$PNCd
c
C% E99W..r||/@/@#**cjjZ^ZeZegjlqrXX r   c                    | j                         |j                  j                  v s!J d|j                  j                   d|         | j                         } | dk(  rd} t	        t
        j                  |       S )Nzinput_precision must be one of z. Got TF32X3TF32x3)lowerrf  allowed_dot_input_precisionsupperr  r
   INPUT_PRECISION)input_precisionr*   s     r   _str_to_dot_input_precisionr;    sx      "goo&R&RR p
)'//*V*V)WW]^m]nopR%++-O(""2%%77r   c           
     	   d }| j                   j                         r|j                   j                         sJ  || j                  |j                  |j                         | j                  j	                         s|j                  j	                         r6t        | t        j                  |      } t        |t        j                  |      }||j                  j                  }t        ||      }t        | j                        }t        |j                        }	||	cxk(  rdk(  s1n ||	cxk(  rdk(  s$n J d| j                   d|j                   d       | j                  d   j                  |j                  d   j                  k(  sVJ d	| j                   d
|j                   d| j                  d   j                   d|j                  d   j                   d	       | j                  d   j                  dk\  r8| j                  d   j                  dk\  r|j                  d   j                  dk\  s"J d| j                   d|j                   d       | j                   j                  j                         rs| j                   j                  t        j                  k(  sJ d       | j                  d   j                  dk\  sJ d       |j!                  d      }
t        j"                  }n|j%                         rt'        d      | j                   j                  j)                         s$| j                   j                  j%                         r"|j+                  d      }
t        j,                  }n4|j/                         r|j1                  d      n|j+                  d      }
|}| j                   j                  d   }|j                   j                  d   }|dk(  r| j                   j                  d   nd }t        j2                  ||r|||gn||g      }||j5                  |
|r|||gn||g      }n|j6                  }|j                   |k(  sJ |M| j                  j9                         r1|j                  j9                         r|j                  j:                  }nd}t        j<                  |j?                  | j6                  |j6                  |||      |      S )Nc                |   |j                   s\| j                         s|j                         rJ d       | j                         r|j                         ry | |k(  sJ d|  d| d       y | j                         s|j                         r@| |k(  sJ d|  d| d       | j	                         s| j                         sJ d|  d       y y | j                         s|j                         r-|j                  rg d	}nd
dg}d } || |d        |||d       y | j                         s:| j                         s*| j                         s| j                         s
J d|         |j                         s:|j                         s*|j                         s|j                         s
J d|        | |k(  sJ d|  d| d       y )Nz1Dot op does not support fp8e4nv on CUDA arch < 90zFirst input (z) and second input (z) must have the same dtype!z0Both operands must be same type. First operand (z) and second operand (r   z:Both operands must be either int8 or uint8. Operand type ()fp8e4nvfp8e5fp8e4b15r>  r?  c           	     z     t         fd|D              s&dj                  |      }t        d| d| d  d      y )Nc              3  F   K   | ]  } t        d |                yw)is_N)r  )r4  
dtype_namer2   s     r   r6  zLdot.<locals>.assert_dtypes_valid.<locals>._validate_dtype.<locals>.<genexpr>=  s%     d
AwuJ<.@ACds   !r=  zOnly supports z. z (r   )anyr,  AssertionError)r2   allowed_typesoperand_namesupported_typess   `   r   _validate_dtypez9dot.<locals>.assert_dtypes_valid.<locals>._validate_dtype<  sM    dVcdd*.))M*B,~o=NbQ]P^^`af`ggh-ijj er   zFirst operandzSecond operandzUnsupported dtype )rg  re  rk  rH   is_int8is_uint8allow_fp8e4b15rD   rF   rB   r   )	lhs_dtype	rhs_dtyperf  rG  rJ  s        r   assert_dtypes_validz dot.<locals>.assert_dtypes_valid*  s1   $$ ++-i6J6J 7 CBC !i&6&6&8	)  A]9+EYZcYdd  ,A  A)!Y%5%5%7 I-  P1abkal  mC  DM  CN  NO  0P  P- ((*i.@.@ / ]OPY{Z[\]  /*!!#y'7'7'9))$DM%.$8Mk
  	=/J	=:JK ((*i.?.?.AYEVEVEX\e\m\m ] 4'	{34  ((*i.?.?.AYEVEVEX\e\m\m ] 4'	{34  I-  EykI]^g]h  iD  0E  E-r   r	      z+Both inputs must be either 2D or 3D; (lhs: z	 vs rhs: r   r#  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (r  z0All non-batch values in both first input shape (z) and second input shape (z) must be >= 16!zonly int8 supported!r   r   zsmall blocks not supported!r   zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`) rS   r   r2   rf  rh  rU   r$   rE   default_dot_input_precisionr;  r  r   r
  rT   rH   r  	get_int32r'   rF   r#   rB   get_fp32rC   rD   get_fp16r   r  ra   rk  max_num_imprecise_acc_defaultr%   
create_dot)rV   rW   accr:  max_num_imprecise_acc	out_dtyper*   rP  lhs_rankrhs_rankr   ret_scalar_tyMNBr   
acc_handles                    r   dotrc  '  s   E@ 88388#4#4#666		399goo>
yy#))"7"7"93

G,3

G,!//EE1/7KO399~H399~Hx$1$H(A(A  REpqtqzqzp{  |E  FI  FO  FO  EP  PQ  DR  RA99R=#))
#E q(3LSYYK  XU  VY  V_  V_  `b  Vc  Vi  Vi  Uj  jZ  [^  [d  [d  eg  [h  [n  [n  Zo  op  qq 99R="$2)<)<)BIIbM2%|
:399+E_`c`i`i_jjz{| & xxxx"'')A+AA)yy|!!R'F)FF'q!				vx 	x		 	 	"chhoo&=&=&?a 

$-$5$5$7Wa W=M=Ma=P!rArA%]qA]]=q1a)q!fEF
{))"1q!Qi1a&I
ZZ
xx6!!! $99#))"2"2"4$+OO$Q$Q!$%!99W''

CJJ
O]rs r   c                   t        | t        j                  |      } | j                  j	                         r0t        | ||      \  } }t        |||      \  }}t        | ||      \  } }t        |||dd      \  }}| j                  j	                         st        | ||      \  } }|j                  }t        j                  |j                  | j                  |j                  |j                        |      S )NT)
rU   r$   r   rS   r   rR   r^   r%   create_selectra   )	conditionr   r   r*   rM  r   s         r   r  r    s    Y1I~~ +Iq'B	1#Aq'21+Iq'B	1'1gtTBDAq>>""$+Iq'B	1VVF99W**9+;+;QXXqxxPRXYYr   c                d    |rt        j                  ||      }n|}t        j                  | |      S rL   )r$   r   r%   )r   ro   rN  res_tys       r   wrap_tensorri    s-    y)4 99Qr   c                   	
 |t        fd D               d} d   j                  j                  
t        
      }||k  sJ d| d       t	        
      D cg c]  \  }}||k7  s| c}}	t        
fd D              sJ d       j                   D cg c]  }|j                   c}|       |       j                          t         	fdt        t                     D              S c c}}w c c}w )Nc              3  f   K   | ](  }t        ||j                  j                  gd        * yw)Tr$  N)r  r	  r
  )r4  tr*   s     r   r6  zreduction.<locals>.<genexpr>  s*     fZ[wq177==/tWUUfs   .1r   z&reduction axis must be < inputs rank (r   c              3  P   K   | ]  }|j                   j                  k(    y wrL   )rS   r   )r4  rl  r   s     r   r6  zreduction.<locals>.<genexpr>  s     5qvv||u$5s   #&z-all reduction inputs must have the same shapec              3     K   | ]7  }t        j                  |      |   j                  j                         9 y wrL   ri  
get_resultrS   rT   )r4  rC  inputs	reduce_oprN  s     r   r6  zreduction.<locals>.<genexpr>  s4     t\]Y11!4fQinn6K6KYWt   =A )
tuplerS   r   r  r@  allcreate_reducera   verifyr  )rq  r)   region_builder_fnr*   rankrC  r  rl  rr  rN  r   s   `  `    @@@r   	reductionrz    s    |f_eff1INN  Eu:D$;H@aHH;(/=tq!19=I5f55f7ff5%%&@Aqxx&@$GIi tafgjkqgrasttt > 'As    C;.C; Dc                     d   j                   j                  t              }| |cxk  r|k  sn J d| d| d       |dk  r||z  } D ]"  }|j                   j                  k(  rJ d        |j                   D cg c]  }|j                   c}||       |       j                          t         fdt        t                     D              S c c}w )Nr   z
scan axis z must be < inputs rank (r   z(all scan inputs must have the same shapec              3     K   | ]7  }t        j                  |      |   j                  j                         9 y wrL   ro  )r4  rC  rq  scan_opr   s     r   r6  z#associative_scan.<locals>.<genexpr>  s4     nVWW//2F1INN4I4I5Qnrs  )rS   r   r  create_scanra   rw  rt  r  )	rq  r)   rx  reverser*   ry  rl  r}  r   s	   `      @@r   associative_scanr    s    1INN  Eu:D5D4S:dV3KD6QR!SSax Qvv||u$P&PP$Q !!V"<188"<dGLGgNNn[`adekal[mnnn	 #=s   C c                (   t        | j                        dk(  sJ d       | j                  j                         sJ d       t	        j
                  |j                  | j                  |      t	        j                  t        j                  |f            S )Nr   z histogram only supports 1D inputz%histogram only supports integer input)
r  r   r2   rH   r$   r%   create_histogramra   r   r'   )re   num_binsr*   s      r   	histogramr    ss    u{{q D"DD ;;H!HH99W--ellHEr}}UWU]U]`h_kGlmmr   c                   t        dt        | j                              t        |      k7  rt        d      | j                  j                  dt        j                  || j                  j                                      | S )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   r  r   r#   ra   set_attrr
   	make_attrget_contextr   valuess     r   multiple_ofr    s[    
1c!''ls6{*\]]HH'fahh>R>R>T)UVHr   c                    t        | j                        t        |      k7  rt        d      | j                  j	                  dt        j                  || j                  j                                      | S )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityr  r   r#   ra   r  r
   r  r  r  s     r   max_contiguousr    sS    
177|s6{"_``HHor||FAHH<P<P<R'STHr   c                    t        | j                        t        |      k7  rt        d      | j                  j	                  dt        j                  || j                  j                                      | S )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  s     r   max_constancyr    sS    
177|s6{"^__HHnbll6188;O;O;Q&RSHr   c                f    t        j                  | j                         t         j                        S rL   )r$   r%   create_barrierr  )r*   s    r   debug_barrierr    s     99W++-rww77r   c                V   | j                  d      s|r| dz  } | j                  d      s
|r| d d dz   } t        |       dkD  r| j                  d      sd| z   } |D cg c]  }|j                   }}t	        j
                  |j                  | ||      t        j                        S c c}w )N r>  r#  r	   )endswithr  
startswithra   r$   r%   create_printr  )prefixargshexr*   argnew_argss         r   device_printr    s     ??3D#??4 Tt#
6{Qv005v&*+s

+H+99W))&#x@"''JJ ,s   B&c           	     \   | j                   }|j                         sPt        j                  |j                  d      }t        j
                  |j                  | j                  d      |      } t        j
                  |j                  | j                  ||||      t        j                        S )N)r   )
rS   r   r$   r   rT   r%   r  ra   create_assertr  )condmsg	file_name	func_namelinenor*   cond_tys          r   device_assertr    s|    iiG--6yy--dkk5A7K99W**4;;Y	SYZ\^\c\cddr   c                   t        |t              rt        j                  |      }t        |t        j                        r|rGd|j                  cxk  rdk  sn J d|j                   d       | j                  |j                        S d|j                  cxk  rdk  sn J d|j                   d       | j                  |j                        S t        |t        j                        r|j                  j                  dk(  sJ d	       |j                  j                         sJ d
       |j                  t        j                  k7  rE|rC| j                  |j                  | j                         |j                  j                               S |j                  t        j                   k7  r	|sJ d       |j                  S J dt#        |              )Nl         l            z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the range           zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetszzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )r   r   r$   r%  r
  rx  rT  r%   r	  r2   rH   rw  rp  ra   get_int64_tyr|   r'   rS   )r*   r  r  s      r   _convert_elem_to_ir_valuer    s   $||D!$%TZZ/%/ F 4#zzl*D2F F/$$TZZ00TZZ/%/ F 4#zzl*D2F F/$$TZZ00	D"))	$zz1$R&RR$zz  "^$^^"::!k**4;;8L8L8NPTPZPZPhPhPjkkZZ288#KS S S5{{TGT
|TT5r   c                v    t        |d      r|D cg c]  }t        | ||       c}S t        | ||      gS c c}w )Nr  )r  r  )r*   	list_liker  r  s       r   r  r  #  s?    y*%R[\$)'4E\\%gy+FGG ]s   6c           	        t        ||      }t        ||      }t        ||d      }| j                  j                         r$| j                  j                  j	                         rt        d      | j                  j                  t        j                  k(  rCt        | t        j                  t        j                  | j                  j                        |      } t        d      sgD cg c]*  }t        |t        j                        r|j                  n|, c}t!        d D              sJ d       t        |d      s|g}|D cg c]*  }t        |t        j                        r|j                  n|, }}t#        |      t%        t'        t)        |                  k(  sJ d       t!        fd||||fD              sJ d	       |j+                  | j,                  ||||      }t        j.                  |t        j                  t        j0                  | j                  j                                    S c c}w c c}w )
NFr  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c              3  `   K   | ]&  }t        |t              xr d |cxk  xr dk  nc  ( yw)r  r  N)r   r   )r4  r  s     r   r6  z!make_block_ptr.<locals>.<genexpr><  s)     XDz$$?4)?%)??Xs   ,.zGExpected a list of constant integers (`int32_t` range) in `block_shape`z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc              3  L   K   | ]  }t              t        |      k(    y wrL   )r  )r4  r  r  s     r   r6  z!make_block_ptr.<locals>.<genexpr>F  s     dis;3y>1ds   !$zBExpected shape/strides/offsets/block_shape to have the same length)r  rS   rM   r  r   r#   r$   r   rU   r  r  r  r  r   r%  r
  ru  r7  r8  r  r  create_make_block_ptrra   r%   r   )	baser   stridesr  r  orderr*   r  ra   s	       `    r   make_block_ptrr  )  s    "'51E#GW5G#GW%HG 99!5!5!>!>!@hii yyrww&D"//"''4993J3JKWU ;
+"mVabdD",,!?4::TIbKXKXX RQRX 5*%PUV:dBLL9TZZtCVEV%=Ds5z!233s5ss3 dE7T[]bCcdd MLMd **4;;wQ\^cdF99VR__R]]499;O;OQ\-]^__% c Ws   ./H=/Ic                    t        ||d      }t        j                  |j                  | j                  |      | j
                        S r  )r  r$   r%   create_advancera   rS   )r  r  r*   s      r   advancer  P  s8    #GW%HG 99W++DKKA499MMr   )r)   r   r*   
ir.builderreturn	tl.tensor)r6   tl.dtyper7   r  r  r  )r6   r  r7   r  rI   r   r  r  )r   r  r   r  rO   r   r  None)FFTF)rV   r  rW   r  r*   r  r  Tuple[tl.tensor, tl.tensor])re   r  rf   r  r*   r  r  r  )
re   r  rf   r  r   r   r*   r  r  r  )r   r  r   r  r   tl.PropagateNanr*   r  )
r   r  r   r  r   r  r   r  r*   r  )re   r  rf   r  r*   r  r  r  )re   r  r*   r  )re   r  r  r  )re   r  r*   r  r  r  )re   r  r*   r  r  r  )r   r  r  ztl.block_type)r  r   r  r   r*   r  r  r  )r   	List[int]r2   r  r*   r  r  r  )r
  r  r   r  r*   r  r  r  )
re   r  r  r  r  r   r*   r  r  r  )re   r  r)   r   r*   r  r  r  )
rV   r  rW   r  r  r   r*   r  r  r  )r'  r  r(  r  r*   r  r  r  )r'  r  r*   r  r  r  )re   r  r:  z
Tuple[int]r*   r  r  r  )re   r  r   r  r*   r  r  r  )rV   r  rW   r  r*   r  r  r  )rW  Optional[str])re   r  r\  r  r*   r  r  r  rL   )
re   r  r\  r  r*   r  rz  r  r  r  )r  r  r  Optional[tl.tensor]rf   r  r  r   r  rH  r  rH  r  rH  r  r   r*   r  r  r  )
r  r  r  rH  r  rH  r*   r  r  r  )r  r  r
  r  r*   r  r  r  )r  r  r  r  r  r  r  rH  r  rH  r*   r  r  r  )r  r  r  r  r  r  r  rH  r  rH  r*   r  r  r  )r  r  r  r  r  r  r  rH  r*   r  r  z&Tuple[tl.tensor, tl.tensor, tl.tensor])r  r  r  r  r  r  r  rH  r  rH  r*   r  r  r  )rV   r  rW   r  rY  r  r:  r  rZ  r   r[  r  r*   r  r  r  )
rf  r  r   r  r   r  r*   r  r  r  )rq  Sequence[tl.tensor]r)   r   r*   r  r  Tuple[tl.tensor, ...])
rq  r  r)   r   r  r   r*   r  r  r  )re   r  r  r   r*   r  r  r  )r   r  r  r  r  r  )r*   r  r  r  )
r  rH  r  zList[tl.tensor]r  r   r*   r  r  r  )r  r  r  rH  r  rH  r  r   r*   r  r  r  )T)r  r  r*   r  r  r  )l
__future__r   typingr   r   r   r   r   _C.libtritonr
    r   r$   r   r   	Exceptionr   r+   r.   r<   rJ   rP   r^   ri   rp   ru   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rk   r   r   r   r   r   r   r   r   r  r  r  r  r  r!  r,  r1  r;  rE  rR   rX  r   rU   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r%  r*  r.  r2  r;  rc  r  ri  rz  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   <module>r     s   " ; ;   CLF	 FDF@  ,P	< ejGK,16Q":04	4R4:&465$5$	]"+5:UQ
P
Q
'&"R
R
Q#$)4444	4	4"D"(.H[MK2
IL$2tpZ( 04o5,o58Ao5n	 		 
w,7=tnn.1nDGnVZnn!*n ',5g
:)qXWW)3W8AW,j(27]6$)N$)No#(18XX&0X5>X@Z& u.o(o-Bo2n8KeU0H$`NNr   