
    ɯwg                        d dl mZmZmZ d dlZd dlZd dlmZ d dlm	Z	 ddl
mZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZ ddlmZ 	 	 d-d	d
dd
dd
dee   dee   dee   dededeee      fdZ	 	 	 	 	 	 d.dZ  G d de      Z! G d de      Z" G d de      Z# G d de      Z$ G d de      Z% G d d e      Z& G d! d"e      Z' G d# d$e      Z( G d% d&e      Z) G d' d(e      Z* G d) d*e      Z+ G d+ d,e      Z,y)/    )AnyListOptionalN)make_channels_last_strides_for
OrderedSet   )ExternKernelAllocFixedLayoutFlexibleLayoutir_node_to_tensorIRNode is_contiguous_storage_and_layoutLayoutmay_convert_to_optionalMultiOutputMultiOutputLayoutMutationOutput
NoneLayout	TensorBox)convert_shape_to_inductorpad_listlike)Vxr   weightbiaspaddingstridedilationgroups
transposedoutput_paddingc
                    d }
d }|j                          |j                          ||j                          t        j                  j                  5  t	        |d      }t	        |d      }t        |j                               dz
  }dt        |      cxk  r|k  sJ  J dt        |      cxk  r|k  sJ  J dt        |      cxk  r|k  sJ  J t        ||      }t        ||      }t        ||      }|	t        dg|      }	n%dt        |	      cxk  r|k  sJ  J t        |	|      }	t        |t        t        j                  j                  j                  f      sJ |r( |||      }|j                         } |
||||	|||      }nR|t	        |d      n|}t        j                  j                   j#                  ||||||||	|	      }|j                         }dgt%        t'        t)        dt        |      dz                     z   }t        |      g|z   }ddd       | j+                  |      }t-        d	 D               }|r!t/        |      rt1        j2                  |      }nt5        |      }|j7                         j8                  d
k(  r|j7                         j8                  d
k(  sJ ||g}t;        |j7                         |j=                         t?        |      t?        |            }||||g}|r|jA                  d|	       ||jC                  |       n|jA                  d|       ||||fS # 1 sw Y   xY w)au  
    This function is a helper function to prepare inputs, layout and constant args
    for convolution post-op fusion's create function, including deciding the output
    layout (channels first or channels last), realizing inputs and make them etc. The
    function only supports the CPU device since conv post-op fusion kernel is only
    supported on CPU right now.
    c                    t        |       t        |      k(  sJ d       t        |       }|dkD  sJ d       d}d}	g }
|
j                  | |          |
j                  ||	   |z         t        d|      D ]P  }||   dz
  ||dz
     z  dz   }| |   dz
  ||dz
     z  ||dz
     dz  z
  |z   ||dz
     z   }|
j                  |       R t        t	        t
        |
            S )NzExpect input dim == weight dim   zExpect input dim > 2r   r	   )lenappendrangelistmapint)output_sizeweight_sizer   r"   r   r   r    dim	BATCH_DIMWEIGHT_INPUT_CHANNELS_DIM
input_sizedkernelinput_size_ds                 ^/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/torch/_inductor/mkldnn_ir.py_conv_input_sizez<_prepare_convolution_fusion_create.<locals>._conv_input_size2   s!    ;3{#33U5UU3+Qw...w	$%!
+i01+&?@6IJq# 	,A!!nq(HQUO;a?FQ!#va!e}41q5>A%' !Q'(  l+	, CZ())    c                 L   | j                         }t        |      }|dkD  sJ d       |dkD  rWg }|j                  |d   |z         |j                  |d   |z         t        d|      D ]  }|j                  ||           |S | j	                  dd      j                         }|S )Nr%   zExpect weight dim > 2r	   r   )sizer&   r'   r(   	transpose)prepacked_weightr    prepacked_weight_sizer.   r-   r2   s         r5   _original_deconv_weight_sizezH_prepare_convolution_fusion_create.<locals>._original_deconv_weight_sizeM   s     !1 5 5 7'(Qw///wA:K4Q7&@A4Q7&@A1c] =""#8#;<=  +44Q:??AKr7   NT)guard_shaper%   r   r	   c              3   <   K   | ]  }t        |t                y wN)
isinstancer+   ).0is     r5   	<genexpr>z5_prepare_convolution_fusion_create.<locals>.<genexpr>   s     GAZ3/Gs   cpu)"realizer   graph	fake_moder   r&   r9   r   rA   r+   sympycorenumbersIntegertorchopsatenconvolutionr)   reversedr(   require_stride_orderallr   r   contiguous_stridesr   
get_devicetyper   	get_dtyper   insertr'   )clsr   r   r   r   r   r   r    r!   r"   r6   r=   x_fakeweight_fakedimsr-   r1   r,   	bias_fakeoutputreq_stride_orderdynamic_shapesoutput_strideinputskernel_layoutconstant_argss                             r5   "_prepare_convolution_fusion_createre      s@   **6" IIK
NN	
		 2F"1$7'DA6;;=!A%3w<'4'''''3x=(D(((((3v;&$&&&&&w-$/fd+!)1#t4Ns>*2d22222).$?N&3

(:(:(B(B"CDDD 7{FKKJ*K >B=M!$D9SW  YY^^//
F !++-K3huQFa/H&I!JJ 0125EEe2Fh 	  $45A G+GGGN:1=&99+F6{C<<>%'F,=,=,?,D,D,MMM[F		!+.!-0	M fh7MQ/dQ%=-1AAAi2F 2Fs   G
L::Mc           
         |j                          |j                          ||j                          |j                         ^ }}|j                         \  }}t        |      |gz   }t        t        t	        t        |j                                                 }| j                  ||      }|j                         j                  dk(  r|j                         j                  dk(  sJ ||g}	t        j                  |      }
t        |j                         |j                         ||
      }g }||	j                  |       n|j                  d|       |	|||fS )z
    This function is a helper function to prepare inputs, layout and constant args
    for linear post-op fusion's create function. The function only supports the CPU device
    since linear post-op fusion kernel is only supported on CPU right now.
    rE   r   )rF   get_sizer)   rQ   r(   r&   rR   rU   rV   r   rT   r   rW   r'   rX   )rY   r   r   r   m_ocr,   r_   rb   ra   rc   rd   s                r5   _prepare_linear_fusion_createrk      s-    IIK
NNJJLEQ OOEArq'RD.KHU3qzz|+<%=>?  $45A<<>%'F,=,=,?,D,D,MMM[F"55kBM			M  "MdQ%=-1AAAr7   c                   |     e Zd Z	 d	 d fdZd Zedddddddee   dee   d	ee   d
edeee	      fd       Z
 xZS )ConvolutionUnaryc                     t         |   |||d t        j                  j                  j
                  j                         d| _        y )Nop_overloada  
            at::Tensor(
                const at::Tensor& input_t,
                const at::Tensor& weight_t,
                const std::optional<at::Tensor>& bias_opt,
                at::IntArrayRef padding,
                at::IntArrayRef stride,
                at::IntArrayRef dilation,
                int64_t groups,
                c10::string_view attr,
                torch::List<std::optional<at::Scalar>> scalars,
                std::optional<c10::string_view> algorithm))super__init__rM   rN   mkldnn_convolution_pointwisedefaultcpp_op_schemaselflayoutrb   rd   	__class__s       r5   rr   zConvolutionUnary.__init__   sF     			((??GG 	 	
>r7   c                 Z   |j                  | j                         | j                  | j                  | j	                         | j
                  | j                  | j                  g | j                  | j                         t        | j                  t              r| j                  |       y y )Nrp   raw_args)6generate_extern_kernel_alloc_and_find_schema_if_neededget_namepython_kernel_namecpp_kernel_namecodegen_argsrv   cpp_kernel_keyrp   rb   rd   rA   ry   r   codegen_size_assertsrx   wrappers     r5   codegenzConvolutionUnary.codegen   s    FFMMO##  ((8t{{8T%7%78 	G 		
 dkk6*%%g. +r7   r   r   r   r   padding_stride_	dilation_r    scalarsc           
      n    t        | |||||||      \  }}}}||t        |	      |
gz   }t        |||      S )Nry   rb   rd   )re   r   rm   )rY   r   r   r   r   r   r   r    attrr   	algorithmrb   rd   rc   ri   s                  r5   createzConvolutionUnary.create  s`     5WFD(GY5
1q &#G,)
 

   '
 	
r7    returnN__name__
__module____qualname__rr   r   classmethodr   r+   r   r   r   __classcell__rz   s   @r5   rm   rm      s    
 	>
 
>4/ 

 
 	

 s)
 c
 9
 
 $s)$
 
r7   rm   c                        e Zd Z	 	 d	 d fdZd Zedddddddddee   d	ee   d
ee   dedede	e
   de	e   de	ee      de	e   fd       Z xZS )ConvolutionBinaryc                     t         |   |||d t        j                  j                  j
                  j                         d| _        || _        y )Nro   a  
            at::Tensor(
                const at::Tensor& input_t,
                const at::Tensor& other_t,
                const at::Tensor& weight_t,
                const std::optional<at::Tensor>& bias_opt,
                at::IntArrayRef padding,
                at::IntArrayRef stride,
                at::IntArrayRef dilation,
                int64_t groups,
                c10::string_view binary_attr,
                std::optional<at::Scalar> alpha,
                std::optional<c10::string_view> unary_attr,
                torch::List<std::optional<at::Scalar>> unary_scalars,
                std::optional<c10::string_view> unary_algorithm))	rq   rr   rM   rN   rs   rt   binaryrv   cpp_constant_args)rx   ry   rb   rd   r   rz   s        r5   rr   zConvolutionBinary.__init__,  sQ     			((??FF 	 	
D "3r7   c                 n   |j                  | j                         | j                  | j                  | j	                         | j
                  | j                  | j                  | j                  g | j                  | j                  	       t        | j                  t              r| j                  |       y y r@   )r~   r   r   r   r   rv   r   cpp_kernel_overload_namerp   rb   rd   rA   ry   r   r   r   s     r5   r   zConvolutionBinary.codegenK  s    FFMMO##  ))/dkk/D../
	
 dkk6*%%g. +r7   r   r   otherr   r   r   r   r   r    binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmc           
          t        | |||||||      \  }}}}| j                  ||      }|j                  d|       ||	|
|t        |      |gz   }t	        |||      S )Nr	   r   )re   rR   rX   r   r   )rY   r   r   r   r   r   r   r   r    r   r   r   r   r   rb   rd   rc   r_   s                     r5   r   zConvolutionBinary.createZ  s    , /FD(GY
	
 ((0@Aa%#M2)
 
 ! '
 	
r7   )r   r   r   )r   r   r   rr   r   r   r   r+   strr   floatr   r   r   r   s   @r5   r   r   +  s    
 3 
3>/ %
%
 %
 	%

 %
 s)%
 c%
 9%
 %
 %
 uo%
 SM%
  S	*%
 "#%
 %
r7   r   c                        e Zd Z	 d	 d fdZd Zdeej                     fdZe	ddddddd	dd
e
e   de
e   de
e   dededee   dee   dee
e      dee   fd       Z xZS )ConvolutionBinaryInplacer   c                 f   |d   |d   g|dd  z   }t         |   |||d t        j                  j                  j
                  j                         d| _        t        t        |d   j                               |d   |       t        t        |d   j                               |d   |       g| _        y )Nr	   r   r%   ro   a  
            at::Tensor&(
                at::Tensor& other_t,
                const at::Tensor& input_t,
                const at::Tensor& weight_t,
                const std::optional<at::Tensor>& bias_opt,
                at::IntArrayRef padding,
                at::IntArrayRef stride,
                at::IntArrayRef dilation,
                int64_t groups,
                c10::string_view binary_attr,
                std::optional<at::Scalar> alpha,
                std::optional<c10::string_view> unary_attr,
                torch::List<std::optional<at::Scalar>> unary_scalars,
                std::optional<c10::string_view> unary_algorithm))rq   rr   rM   rN   rs   _convolution_pointwise_r   rv   r   r   rU   mutation_outputs)rx   rc   rb   rd   reordered_inputsrz   s        r5   rr   z!ConvolutionBinaryInplace.__init__  s     #1Ivay1F12J>		((@@GG 	 	
D" :fQi&:&:&<=vay$O:fQi&:&:&<=vay$O!
r7   c                    |j                  | j                         | j                  | j                  | j	                         | j
                  | j                  | j                  | j                  g | j                  | j                  	       y r@   )r~   r   r   r   r   rv   r   r   rp   rb   rd   r   s     r5   r   z ConvolutionBinaryInplace.codegen  sr    FFMMO##  ))/dkk/D../
	
r7   c                     t               S r@   r   rx   s    r5   get_unbacked_symbol_defsz1ConvolutionBinaryInplace.get_unbacked_symbol_defs  
    |r7   r   r   r   r   r   r   r   r   r    r   r   r   r   r   c           
         t        | |||||||      \  }}}}| j                  ||      }|j                  d|       ||	|
|t        |      |gz   }t	        t        |d   j                               ||      }|j                  d   S )Nr	   )rc   rb   rd   r   )re   rR   rX   r   r   r   rU   rb   )rY   r   r   r   r   r   r   r   r    r   r   r   r   r   rb   rd   ri   r_   packeds                      r5   r   zConvolutionBinaryInplace.create  s    , /FD(GY
	
 ((0@Aa%#M2)
 
 *$VAY%9%9%;<'
 }}Qr7   r   r   )r   r   r   rr   r   r   rI   Symbolr   r   r   r+   r   r   r   r   r   r   r   s   @r5   r   r     s    
 	$

 
$
L
*U\\*B  ) )  )  	) 
 )  s))  c)  9)  )  )  uo)  SM)   S	*)  "#)  ) r7   r   c                        e Zd Z	 d	 d fdZd Zedddddddee   dee   d	ee   d
ee   dedeee	      fd       Z
 xZS )ConvolutionTransposeUnaryc                     t         |   |||d t        j                  j                  j
                  j                         d| _        y )Nro   a  
            at::Tensor(
                const at::Tensor& input_t,
                const at::Tensor& weight_t,
                const std::optional<at::Tensor>& bias_opt,
                at::IntArrayRef padding,
                at::IntArrayRef output_padding,
                at::IntArrayRef stride,
                at::IntArrayRef dilation,
                int64_t groups,
                c10::string_view attr,
                torch::List<std::optional<at::Scalar>> scalars,
                std::optional<c10::string_view> algorithm))rq   rr   rM   rN   rs    _convolution_transpose_pointwiseru   rv   rw   s       r5   rr   z"ConvolutionTransposeUnary.__init__  sF     			((IIQQ 	 	
>r7   c                     |j                  | j                         | j                  | j                  | j	                         | j
                  | j                         y r@   r~   r   r   r   r   rv   r   r   s     r5   r   z!ConvolutionTransposeUnary.codegen  I    FFMMO##  	
r7   r   r   r   r   r   output_padding_r   r   groups_r   c                 v    d}t        | |||||||||
      \  }}}}||	t        |
      |gz   }t        |||      S )NTr   )re   r   r   )rY   r   r   r   r   r   r   r   r   r   r   r   r!   rb   rd   rc   ri   s                    r5   r   z ConvolutionTransposeUnary.create  sy     
 /
	
 &#G,)
 

 ) '
 	
r7   r   r   r   r   s   @r5   r   r     s    
 	>
 
>6
 )
)
 )
 	)

 s))
 c)
 c)
 9)
 )
 $s)$)
 )
r7   r   c                        e Zd Z	 d	 d fdZd Zedddededdddd	dd
ddee   dee   dee   dededefd       Z	 xZ
S )QConvPointWisePT2Ec                     t        |      dk(  | _        t        |   |||dt        j
                  j                  j                  j                         d| _	        y)a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp]
            - const_args is: [stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp]
            - const_args is: [bias, stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
           Nro   a  
            at::Tensor(
                at::Tensor act,
                double act_scale,
                int64_t act_zero_point,
                at::Tensor weight,
                at::Tensor weight_scales,
                at::Tensor weight_zero_points,
                std::optional<at::Tensor> bias,
                torch::List<int64_t> stride,
                torch::List<int64_t> padding,
                torch::List<int64_t> dilation,
                int64_t groups,
                double output_scale,
                int64_t output_zero_point,
                std::optional<c10::ScalarType> output_dtype,
                c10::string_view attr,
                torch::List<std::optional<at::Scalar>> scalars,
                std::optional<c10::string_view> algorithm))
r&   has_biasrq   rr   rM   rN   onednnqconv2d_pointwiseru   rv   rw   s       r5   rr   zQConvPointWisePT2E.__init__;  sV      Fq(		((::BB 	 	
>r7   c                    | j                   D cg c]  }|j                          }}g d}| j                  s|j                  dd       t	        | j                  |            }|d   }| j                   d   }|d   }| j                   d   }| j                  r|d   n|d   }	| j                  r| j                   d   n| j                  d   }
|d   |d   }}| j                   d   | j                   d   }}|d d \  }}| j                  d d \  }}|dd  \
  }}}}}}}}}}| j                  dd  \
  }}}} }!}"}#}$}%}&|||||||	||||||||||f}'|||||||
|||| |!|"|#|$|%|&f}(|j                  | j                         | j                  | j                  |'| j                  | j                  | j                  |(	       t        | j                  t               r| j#                  |       y y c c}w )
N)x_scalex_zero_pointr   r   r   r    output_scaleoutput_zero_pointoutput_dtyper   r   r   r%   r   r   r	   r|   )rb   codegen_referencer   rX   r)   codegen_const_argsrd   r~   r   r   r   rv   r   rp   rA   ry   r   r   ))rx   r   r   argsconst_arg_names
const_argsx_rawpacked_weightpacked_weight_rawr   bias_raww_scalew_zpw_scale_raww_zp_rawr   x_zpx_scale_rawx_zp_rawr   r   r   r    o_scaleo_zpr   r   r   r   
stride_rawpadding_rawdilation_raw
groups_rawo_scale_rawo_zp_rawoutput_dtype_rawunary_attr_rawunary_scalars_rawunary_algorithm_rawr   r}   s)                                            r5   r   zQConvPointWisePT2E.codegeng  so    04{{;!##%;;
 }}""1f-$11/BC
GAQ KKN--tAwZ]%)]]4;;q>8J8J18MR$r( $BRX rN	

 r"	
 st	
 st$	
 #
( #
& 	FFMMO##  (( 	G 		
 dkk6*%%g. +W <s   G$qxr   r   r   qwr   w_zero_pointr   r   r   r   r    r   r   c                 |   d}d }t        | ||||	||
|||
      \  }}}}||d   |d   c|d<   |d<   n|d   |d   c|d<   |d<   |j                          |j                          |||gz   }||g|z   ||||t        |      |gz   }|J |t        j                  t        j
                  fv r||_        t        |||      S )NFr%   r	   r   r   )re   rF   r   rM   float32bfloat16dtyper   )rY   r   r   r   r   r   r   r   r   r   r   r    r   r   r   r   r   r   r!   r"   rb   rd   rc   ri   s                           r5   r   zQConvPointWisePT2E.create  s/   * 
4V5
1q <1>q1A=QRCS.M!mA.1>q1A=QRCS.M!mA.7L11  	 !'0 	  '''EMM5>>:: #/M! '
 	
r7   r   r   )r   r   r   rr   r   r   r   r+   r   r   r   r   s   @r5   r   r   :  s    
 	*>
 
*>Xo/b F
F
 F
 	F

 F
 F
 "F
 F
 S	F
 cF
 s)F
 F
 F
 F
 F
r7   r   c                        e Zd Z	 d	 d fdZd Zd Zdeej                     fdZ	e
ddddd	dd
ddee   dee   dee   deddddfd       Z xZS )QConvPointWiseBinaryPT2Er   c                     t        |      dk(  | _        | j                  rdnd| _        t        |   |||dt
        j                  j                  j                  j                         d| _
        y)av  
        Needs input/weight/output qparams
        if bias is not None
            - inputs = [x, w, b, accum, w_scale, w_zp]
            - const_args = [stride, padding, dilation, groups, x_scale, x_zp, accum_scale, accum_zp, o_scale, o_zp,
            fp32_output, binary_attr, aplha, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, accum, w_scale, w_zp]
            - const_args = const_args is: [bias, stride, padding, dilation, groups, x_scale, x_zp, accum_scale,
            accum_zp, o_scale, o_zp, fp32_output, binary_attr, aplha, unary_attr, unary_scalars, unary_algorithm]
              r%   Nro   a  
            at::Tensor(
                at::Tensor act,
                double act_scale,
                int64_t act_zero_point,
                at::Tensor accum,
                double accum_scale,
                int64_t accum_zero_point,
                at::Tensor weight,
                at::Tensor weight_scales,
                at::Tensor weight_zero_points,
                std::optional<at::Tensor> bias,
                torch::List<int64_t> stride,
                torch::List<int64_t> padding,
                torch::List<int64_t> dilation,
                int64_t groups,
                double output_scale,
                int64_t output_zero_point,
                std::optional<c10::ScalarType> output_dtype,
                c10::string_view binary_attr,
                std::optional<at::Scalar> alpha,
                std::optional<c10::string_view> attr,
                torch::List<std::optional<at::Scalar>> scalars,
                std::optional<c10::string_view> algorithm))r&   r   idx_for_inplace_sumrq   rr   rM   rN   r   r   r   rv   rw   s       r5   rr   z!QConvPointWiseBinaryPT2E.__init__#  sf    " Fq((,11 		((::AA 	 	
>r7   c                 4   | j                   D cg c]  }|j                          }}g d}| j                  s|j                  dd       t	        | j                  |            }|d   }| j                   d   }|d   }| j                   d   }| j                  r|d   n|d   }	| j                  r| j                   d   n| j                  d   }
|d   |d   |d	   }}}| j                   d   | j                   d   | j                   d	   }}}|d d \  }}}}| j                  d d \  }}}}|d
d  \  }}}}}}}} }!}"}#}$| j                  d
d  \  }%}&}'}(})}*}+},}-}.}/}0||||||||||	|||||||| |!|"|#|$f}1||||||||||
|%|&|'|(|)|*|+|,|-|.|/|0f}2|j                  | j                         | j                  | j                  |1| j                  | j                  | j                  | j                  |2	       t        | j                   t"              r| j%                  |       y y c c}w )N)r   r   accum_scaleaccum_zero_pointr   r   r   r    r   r   r   r   alphar   r   r      r   r   r	   r%   r   r   r|   )rb   r   r   rX   r)   r   rd   r~   r   r   r   rv   r   r   rp   rA   ry   r   r   )3rx   r   r   r   r   r   r   r   r   r   r   accumr   r   	accum_rawr   r   r   r   r   accum_zpr   r   accum_scale_rawaccum_zp_rawr   r   r   r    r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   binary_attr_raw	alpha_rawr   r   r   	conv_argsr}   s3                                                      r5   r   z QConvPointWiseBinaryPT2E.codegenV  s    04{{;!##%;;
$ }}""1f-$11/BC
GAQ KKN--tAwZ]%)]]4;;q>8J8J18M#Bxb48wKKOKKOKKO !);	 rN	
 r"	
 st	
 st$	
 -
	2 -
0 	FFMMO##  ))(( 	G 
	
 dkk6*%%g. +M <s   Hc                 R    | j                   | j                     j                         gS r@   )rb   r   r   r   s    r5   get_mutation_namesz+QConvPointWiseBinaryPT2E.get_mutation_names  s#    D445>>@AAr7   c                     t               S r@   r   r   s    r5   r   z1QConvPointWiseBinaryPT2E.get_unbacked_symbol_defs  r   r7   r   r   qaccumr   r   r   r   r   r    r   r   c                 B   d}d }t        | |||
||||||
      \  }}}}| j                  ||      }|j                  |       |
|d   |d   c|d<   |d<   n|d   |d   c|d<   |d<   |j                          |	j                          |||	gz   }||||g|z   ||||||t	        |      |gz   }|dk(  sJ d       t
        j                  j                  |j                                t        t        |j                               ||      }|j                  |j                     S )NFr%   r	   r   sumzCFor now, only post op sum is supported in QConvPointWiseBinaryPT2E.r   )re   rR   r'   rF   r   r   rG   mark_buffer_mutatedr   r   r   rU   rb   r   )rY   r   r   r   r  r   r   r   r   r   r   r   r   r   r    r   r   r   r   r   r   r   r   r!   r"   rb   rd   rc   r_   r   s                                 r5   r   zQConvPointWiseBinaryPT2E.create  s   4 
 /
	
 ))&2BCf <1>q1A=QRCS.M!mA.1>q1A=QRCS.M!mA.7L11  	  !'6	 	* 5 	QP	Q  	
##FOO$56)f//12'
 }}V7788r7   r   r   )r   r   r   rr   r   r  r   rI   r   r   r   r   r+   r   r   r   s   @r5   r   r   "  s    
 	1>
 
1>fJ/XB*U\\*B  Y9Y9
 Y9 Y9 Y9 S	Y9 cY9 s)Y9 Y9  "!Y9" '#Y9 Y9r7   r   c                   <     e Zd Z	 d	 d fdZd Zed        Z xZS )MKLPackedLinearc                     t         |   |||d t        j                  j                  j
                  j                         d| _        y )Nro   a  
            at::Tensor(
                const at::Tensor& self,
                const at::Tensor& mkl_weight_t,
                const at::Tensor& origin_weight_t,
                const std::optional<at::Tensor>& bias_opt,
                const int64_t prepack_batch_size))rq   rr   rM   rN   mkl_mkl_linearru   rv   rw   s       r5   rr   zMKLPackedLinear.__init__F  sD     			1199 	 	
5r7   c                     |j                  | j                         | j                  | j                  | j	                         | j
                  | j                         y r@   r   r   s     r5   r   zMKLPackedLinear.codegen[  r   r7   c                    | j                  | j                  |            }| j                  | j                  |            }|j                         ^ }}|j                         \  }}t        |      |gz   }	t	        j
                  |	      }
|||g}|g}|||gz  }n|j                  dd        t        t        |j                         |j                         |	|
      ||      S )Nr   r   )require_stride1realize_inputrg   r)   r   rT   rX   r  r   rU   rW   )rY   r   packed_worig_wB
batch_sizerh   ri   rj   r,   ra   rb   rd   s                r5   r   zMKLPackedLinear.createe  s     1 1! 45$$S%6%6v%>?

A!A1gn&99+FXv&#=qcMF  D){M '
 	
r7   r   r   )r   r   r   rr   r   r   r   r   r   s   @r5   r  r  E  s0    
 	5
 
5*
 
 
r7   r  c                   B     e Zd Z	 d	 d fdZd Zed        Zd Z xZS )LinearUnaryc                     t         |   |||d t        j                  j                  j
                  j                         d| _        d| _        y )Nro   linear_pointwiseaL  
            at::Tensor(
                const at::Tensor& input_t,
                const at::Tensor& weight_t,
                const std::optional<at::Tensor>& bias_opt,
                c10::string_view attr,
                torch::List<std::optional<at::Scalar>> scalars,
                std::optional<c10::string_view> algorithm))	rq   rr   rM   rN   rs   _linear_pointwiseru   r   rv   rw   s       r5   rr   zLinearUnary.__init__~  sP     			((::BB 	 	
 1>r7   c                     |j                  | j                         | j                  | j                  | j	                         | j
                  | j                         y r@   r   r   s     r5   r   zLinearUnary.codegen  r   r7   c           	         | j                  | j                  |            }| j                  | j                  |            }|j                         ^ }}|j                         \  }	}||g}
||r|ndg|g}|2| j                  | j                  |            }|
j                  |       n|j	                  dd        t        t        |j                         |j                         t        |      |	gz         |
|      S )Nr   r   devicer   r9   r   )
require_contiguousr  rg   r'   rX   r!  r   rU   rW   r)   )rY   r   wr  r   r   r   rh   icrj   rb   rd   s               r5   r   zLinearUnary.create  s    ""3#4#4Q#78""3#4#4Q#78BBQ'wtYG=&&s'8'8';<AMM!  D)!||~kkm!Wt^
 '
 	
r7   c                      y r@   r   r   s    r5   apply_constraintzLinearUnary.apply_constraint      r7   r   r   )	r   r   r   rr   r   r   r   r-  r   r   s   @r5   r!  r!  }  s5    
 	>
 
>.
 
 
0r7   r!  c                   F     e Zd ZdZ	 d	 d fdZd Zed        Zd Z xZ	S )LinearBinaryz)torch.ops.mkldnn._linear_pointwise.binaryc                     t         |   |||d t        j                  j                  j
                  j                         d| _        y )Nro   a  
            at::Tensor(
                const at::Tensor& input_t,
                const at::Tensor& other_t,
                const at::Tensor& weight_t,
                const std::optional<at::Tensor>& bias_opt,
                c10::string_view attr)
        )rq   rr   rM   rN   rs   r$  r   rv   rw   s       r5   rr   zLinearBinary.__init__  sF     			((::AA 	 	
r7   c           	          |j                  | j                         | j                  | j                  | j	                         | j
                  | j                  | j                         y r@   )r~   r   r   r   r   rv   r   r   r   s     r5   r   zLinearBinary.codegen  sR    FFMMO##  ))	
r7   c           	      0   | j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }|j                         ^ }}|j                         \  }}|||g}	|g}
|2| j                  | j                  |            }|	j                  |       n|
j	                  d|       t        t        |j                         |j                         t        |      |gz         |	|
      S )Nr   r'  r   )
r)  r  rg   r'   rX   r0  r   rU   rW   r)   )rY   r   yr*  r  r   rh   r+  rj   rb   rd   s              r5   r   zLinearBinary.create  s    ""3#4#4Q#78""3#4#4Q#78""3#4#4Q#78BBQ=&&s'8'8';<AMM!  A&!||~kkm!Wt^
 '
 	
r7   c                      y r@   r   r   s    r5   r-  zLinearBinary.apply_constraint  r.  r7   r   r   )
r   r   r   r3   rr   r   r   r   r-  r   r   s   @r5   r0  r0    s:    8F 	
 
,	
 
 
4r7   r0  c                   f     e Zd Z	 	 	 d	 d fdZd Zedddededdddd	dd
ddedefd       Z xZ	S )QLinearPointwisePT2Ec                 0   || _         || _        t        |   |||d|r.t        j
                  j                  j                  j                  n-t        j
                  j                  j                  j                         |rdnd\  }}d| d| d| _
        y)a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp]
            - const_args is: [x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp]
            - const_args is: [bias, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        Nro   
at::Tensorr:  doubleint64_tI
            at::Tensor(
                at::Tensor act,
                 act_scale,
                a   act_zero_point,
                at::Tensor weight,
                at::Tensor weight_scales,
                at::Tensor weight_zero_points,
                std::optional<at::Tensor> bias,
                double output_scale,
                int64_t output_zero_point,
                std::optional<c10::ScalarType> output_dtype,
                c10::string_view post_op_name,
                torch::List<std::optional<at::Scalar>> post_op_args,
                c10::string_view post_op_algorithm))r   x_scale_zp_are_tensorsrq   rr   rM   rN   r   qlinear_pointwisetensorru   rv   	rx   ry   rb   rd   r   r@  x_scale_type_strx_zp_type_strrz   s	           r5   rr   zQLinearPointwisePT2E.__init__   s    $ !&<#% 		((::AA!!33;; 	 	
 & )& 	(-
" "" # 
 4	7r7   c                 R   | j                   D cg c]  }|j                          }}g }|j                  | j                                |d   }| j                   d   }|d   }| j                   d   }| j                  r|d   n|d   }| j                  r| j                   d   n| j
                  d   }	|d   |d   }}
| j                   d   | j                   d   }}| j                  r[t        |      dk\  sJ |d   |d   }}| j                   d   | j                   d   }}|d	d  \  }}}}}}| j
                  d	d  \  }}}}}}n6t        |      d
k\  sJ |dd  \  }}}}}}}}| j
                  dd  \  }}}}}}}}|||||
||||||||f}|||||||	||||||f}|j                  | j                         | j                  | j                  || j                  | j                  | j                  | j                  |	       t!        | j"                  t$              r| j'                  |       y y c c}w )Nr   r	   r%   r   r   r  r  i   irb   r   extendr   r   rd   r@  r&   r~   r   r   r   rv   r   r   rp   rA   ry   r   r   ) rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r}   s                                    r5   r   zQLinearPointwisePT2E.codegen2  s    04{{;!##%;;
$1134GAQ KKN--tAwZ]%)]]4;;q>8J8J18MR$r( $BRX&&t9>!> Hd2hTG$(KKOT[[_K 23 ""23' !# z?a''' 23	 ""23'	 !# 
  
 	FFMMO##  ))
	
 dkk6*%%g. +A <s   H$r   r   r   r   r   r   r   r   r   r   c                    t        | |||      \  }}}}t        |t              r:t        |t              r*|j                          |j                          |||gz   }d}n+t        |t              rt        |t
              sJ |||gz   }d}|j                          |j                          |||gz   }|||	|
|t        |      |gz   }|
J |
t        j                  t        j                  fv r|
|_
        t        ||||d u|      S )NTFry   rb   rd   r   r@  )rk   rA   r   rF   r   r+   r   rM   r   r   r   r7  )rY   r   r   r   r   r   r   r   r   r   r   post_op_namepost_op_argspost_op_algorithmrb   rd   rc   ri   r@  s                      r5   r   zQLinearPointwisePT2E.create  s+   " 5R	5
1q gy)jy.QOO  "w55F%)"gu-*\32OOO)Wl,CCM%*"7L11%#L1)
 
 '''EMM5>>:: #/M# '$&#9
 	
r7   r   TFr   )
r   r   r   rr   r   r   r   r+   r   r   r   s   @r5   r7  r7    s    
 $07 
07dd/L 8
8
 8
 	8

 8
 8
 "8
 8
 8
 8
 8
r7   r7  c                   p     e Zd Z	 	 	 d	 d fdZd Zd Zedddededdd	dd
ddddddedefd       Z	 xZ
S )QLinearPointwiseBinaryPT2Ec                 0   || _         || _        t        |   |||d|r.t        j
                  j                  j                  j                  n-t        j
                  j                  j                  j                         |rdnd\  }}d| d| d| _
        y)a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp, x2]
            - const_args is: [x_scale, x_zp, o_scale, o_zp,
              fp32_output, binary_attr, aplha, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp, x2]
            - const_args is: [bias, x_scale, x_zp, o_scale, o_zp,
              fp32_output, binary_attr, aplha, unary_attr, unary_scalars, unary_algorithm]
        Nro   r9  r;  r>  r?  a   act_zero_point,
                at::Tensor weight,
                at::Tensor weight_scales,
                at::Tensor weight_zero_points,
                std::optional<at::Tensor> other,
                std::optional<at::Tensor> bias,
                double inv_output_scale,
                int64_t output_zero_point,
                std::optional<c10::ScalarType> output_dtype,
                double other_scale,
                int64_t other_zero_point,
                c10::string_view binary_post_op,
                double binary_alpha,
                c10::string_view unary_post_op,
                torch::List<std::optional<at::Scalar>> unary_post_op_args,
                c10::string_view unary_post_op_algorithm))r   r@  rq   rr   rM   rN   r   rA  binary_tensorr   rv   rC  s	           r5   rr   z#QLinearPointwiseBinaryPT2E.__init__  s    $ !&<#% 		((::HH!!33:: 	 	
 & )& 	(-
" "" #  :	=r7   c                    | j                   D cg c]  }|j                          }}g }|j                  | j                                |d   }| j                   d   }|d   }| j                   d   }| j                  r|d   n|d   }| j                  r| j                   d   n| j
                  d   }	|d   |d   |d   }}}
| j                   d   | j                   d   | j                   d   }}}| j                  rct        |      dk\  sJ |d   |d	   }}| j                   d   | j                   d	   }}|d
d  \
  }}}}}}}}}}| j
                  d
d  \
  }}} }!}"}#}$}%}&}'n>t        |      dk\  sJ |dd  \  }}}}}}}}}}}}| j
                  dd  \  }}}}} }!}"}#}$}%}&}'|||||
|||||||||||||f}(||||||||	||| |!|"|#|$|%|&|'f})|j                  | j                         | j                  | j                  |(| j                  | j                  | j                  | j                  |)	       t!        | j"                  t$              r| j'                  |       y y c c}w )Nr   r	   r%   r  r   r   r   rG  r   rH  r  rI  )*rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	other_rawr   r   r   r   r   r   r   other_scaleother_zpr   r   r   r   r   r   r   r   other_scale_rawother_zp_rawr	  r
  r   r   r   r   r}   s*                                             r5   r   z"QLinearPointwiseBinaryPT2E.codegen  s    04{{;!##%;;
$1134GAQ KKN--tAwZ]%)]]4;;q>8J8J18M#Bxb48uKKOKKOKKO  )X
 &&t9>!> Hd2hTG$(KKOT[[_K 34  ""34( !# z?a''' 34  ""34( !# %
* %
( 	FFMMO##  ))
	
 dkk6*%%g. +} <s   Ic                 j    | j                   d   }|dk(  r| j                  d   j                         gS g S )NrV  r  r   )rd   rb   r   )rx   binary_post_ops     r5   r  z-QLinearPointwiseBinaryPT2E.get_mutation_names  s9    ++B/U"KKO,,.//Ir7   r   r   r   r   r   r   r   r   r   r   r   c                 2   t        | |||      \  }}}}t        |t              r:t        |t              r*|j                          |j                          |||gz   }d}n+t        |t              rt        |t
              sJ |||gz   }d}|j                          |j                          |||gz   }|dk(  r| j                  ||      }|j                  |       ||	|
||||||t        |      |g
z   }|dk(  ret        j                  j                  |j                                t        t        |j                               |||d u|      }|j                   d   S |J |t"        j$                  t"        j&                  fv r||_        t        ||||d u|      S )NTFr  rL  r   )rk   rA   r   rF   r   r+   rR   r'   r   r   rG   r  r   rR  r   rU   rb   rM   r   r   r   )rY   r   r   r   r   r   r   r   r   r   r   r   rX  rY  r]  r   unary_post_opunary_post_op_argsunary_post_op_algorithmrb   rd   rc   r_   r@  r   s                            r5   r   z!QLinearPointwiseBinaryPT2E.create  s   6 *	
	
 gy)jy.QOO  "w55F%)"gu-*\32OOO)Wl,CCM%*"7L11U",,U4DEEe%#$67#)
 
 U"GG''(89/!%"2"2"45+d*'=F ==$$'''EMM5>>:: #/M) '$&#9
 	
r7   rP  r   )r   r   r   rr   r   r  r   r   r+   r   r   r   s   @r5   rR  rR    s    
 $5= 
5=nB/H U
U
 U
 	U

 U
 U
 "U
 U
 U
 U
 U
 U
r7   rR  c            !       ~     e Zd Z	 d	 d fdZeddddddddddddd	dd
edee   dededededededef d       Z xZ	S )MkldnnRnnLayerc                     t         |   |||d t        j                  j                  j
                  j                         y )Nro   )rq   rr   rM   rN   rO   mkldnn_rnn_layerru   rw   s       r5   rr   zMkldnnRnnLayer.__init__  s:     			77?? 	 	
r7   r   r   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                    | j                  | j                  |            }|j                          | j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }|j                          | j                  | j                  |            }|j                          |j                         }t	        |      dk(  sJ d       |\  }}}|||g}|j                         }|j                         }g }|||||||g}||	|
||||||g	}t        t        |j                               ||      }d }|||g} |||      t        j                  |      t        j                  |      g}t        t        ||            D  cg c]D  \  }\  }} t        t        |j                         |j                         ||       |t        |fg      F }!}}} |!S c c} }}w )Nr   zExpect lstm input to be 3D)rb   rd   c                 V    t        |       dk(  sJ d       t        j                  |       S )Nr   zExpect output_shape to be 3D)r&   r   rT   )output_shapers  s     r5   get_strides_of_lstm_outputz9MkldnnRnnLayer.create.<locals>.get_strides_of_lstm_output?  s,    |$)I+II)!44\BBr7   )r  r  freeze_layoutrg   r&   rc  r   rU   r   rT   	enumeratezipr   r   rW   tuple)"rY   r   rf  rg  rh  ri  rj  rk  rl  rm  rn  ro  rp  rq  rr  rs  rt  r1   
seq_length
mini_batchrw  hy_shapecy_shaperesrb   rd   r   rx  output_sizesoutput_stridesrC   r,   ra   	output_irs"                                     r5   r   zMkldnnRnnLayer.create  sR   (  1 1! 45 	
  !2!22!67  !2!22!67  !2!22!67  !2!22!67  !2!22!67
  !2!22!67
ZZ\
:!#A%AA# .8*
J
"J<;;=;;=RRR,

  alln-'
	C %h9&|[A--h7--h7
  4=L.14
 
 0/K LLNKKM!	 	
	 
  !
s   /A	H>r   r   )
r   r   r   rr   r   boolr   r+   r   r   r   s   @r5   rc  rc    s    
 	

 

 YY Y 	Y
 Y Y Y Y Y #YY Y Y Y Y Y  !Y" #Y Yr7   rc  )FN)r   r   r   r   r   r   )-typingr   r   r   rI   rM   torch._prims_commonr   torch.utils._ordered_setr   irr
   r   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   virtualizedr   r+   r  re   rk   rm   r   r   r   r   r   r  r!  r0  r7  rR  rc  r   r7   r5   <module>r     s   & &   > /    ;  *.YBYB YB 	YB
 #YYB IYB 3iYB YB YB T#Y'YBx)B)B )B 	)BXC
( C
LU
) U
pa 0 a HP
 1 P
fe
* e
P`90 `9F	5
' 5
p<# <~@$ @FR
, R
jY
!2 Y
xi& ir7   