
    wg{                     T   d dl Z ddlmZmZ ddlmZ ddlmZ d Z edd i       ed	d
 i      ed	ej                  fd                     Z	 edd i       ed	d i      ed	ej                  fd                     Z
 G d de j                  j                        Zej                  Zy)    N   )
heuristicsjit)languagenext_power_of_2c                     | dk  ry| dk  ryy)Ni      i           )Ns    ]/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/triton/ops/cross_entropy.py	num_warpsr      s    4x	
T    r   c                     t        | d         S Nr   r   nargss    r   <lambda>r          	%*(= r   BLOCKc                     t        | d         S r   r   r   s    r   r   r          OE#J$? r   c                    t        j                  d      }t        j                  d|      }t        j                  ||z         }| ||z  z   |z   } |||z  z   |z   }	|||z  z   |z   }
t        j                  | ||k  t	        d             }|j                  t         j                        }|t        j                  |d      z
  }t        j                  t        j                  t        j                  |      d            |z
  }t        j                  |	|||k         t        j                          t        j                  |
      }t        j                  ||z   |       y Nr   inf)maskother)r   )tl
program_idarangeloadfloattofloat32maxlogsumexpstoredebug_barrier)LOGITSPROBSIDXLOSSr   r   rowcolsidx
WRIT_PROBS
READ_PROBSlogitsprobss                r   _forwardr9      s    --
C99QD
''#)
CcAg$Fq4'Jq3&JWWV$(5<-@FYYrzz"FbffVQ''FFF266"&&.!,-6EHHZTAX. GGJEHHTCZr   c                     t        | d         S r   r   r   s    r   r   r   )   r   r   c                     t        | d         S r   r   r   s    r   r   r   *   r   r   c                    t        j                  d      }t        j                  d|      }t        j                  ||z         }| ||z  z   |z   } t        j                  | ||k  t	        d             }t        j
                  |j                  t         j                              }||k(  }	t        j                  ||z         }
||	z
  |
z  }t        j                  | |j                  | j                  j                        ||k         y r   )r!   r"   r#   r$   r%   r+   r&   r'   r,   dtype
element_ty)r/   r0   DPROBSr   r   r2   r3   r4   r8   deltadoutdins               r   	_backwardrC   )   s     --
C99QD
''#)
CC!GOd"E WWUu>>EFF588BJJ'(ECKE776C< D5=D
 CHHUCFF5;;112Br   c                   ,    e Zd Zed        Zed        Zy)_cross_entropyc                 T  	 |j                   t        j                  k(  sJ d       j                  j                   }}j                  d   	t        j
                  |||      }t        j
                  ||      }	fd}t        |   |||	       |j                  ||       |S )Nz(Indices are expected to be of type long.)r=   devicec                 ,    j                         z  fS Nnumel)optr7   n_colss    r   r   z(_cross_entropy.forward.<locals>.<lambda>I   s    FLLNf47 r   )r=   torchint64rH   shape
empty_liker9   save_for_backward)
clsctxr7   indicesrH   r=   resultneg_logprobsgridrN   s
     `      @r   forwardz_cross_entropy.forward?   s     ,Y/YY,v||b!!!'vF''eFK7v|WffElG4r   c                 x    |j                   \  }j                  d   fd}t        |   ||       dfS )a  We know d(-log(p[i])/dlogit[k] = -id_mat[i,k] + p[k]
        so we initialize the gradient as neg_logprobs, so we can just exponentiate
        to get p[k], which is most of what we need...  neg_logprobs will be
        modified in place to become the gradient we want
        rG   c                 ,    j                         z  fS rJ   rK   )rM   rN   rX   s    r   r   z)_cross_entropy.backward.<locals>.<lambda>[   s    L..0F:= r   N)saved_tensorsrQ   rC   )rT   rU   dneg_logprobsrV   rY   rN   rX   s        @@r   backwardz_cross_entropy.backwardO   sI     !$ 1 1g ##B'=$g}fET!!r   N)__name__
__module____qualname__classmethodrZ   r_   r   r   r   rE   rE   =   s(      " "r   rE   )rO    r   r   r   r!   r   r   	constexprr9   rC   autogradFunctionrE   applycross_entropyr   r   r   <module>rj      s        [=>?W?@A     B @ , [=>?W?@ACBLL C  B @C" "U^^,,  "F $$r   