
    ɯwg                         d dl Z d dlmZmZ d dlZd dlmc mZ d dl	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZmZ dd
lmZmZmZ ddlmZmZm Z  ddl!m"Z"m#Z# d Z$y)    N)ListOptional)mm_args   )ir)CppPackedGemmTemplatecreate_epilogue_with_attr)	TensorBox)addadd_needs_realized_inputsatenpermuteregister_loweringto_dtypeview)autotune_select_algorithmChoiceCallerExternKernelChoice)use_aten_gemm_kernelsuse_cpp_packed_gemm_templateuse_max_autotune)opsVc            !         t         j                  j                  rddlm t        t         j                  j                  j                  ddj                  j                        t        t         j                  j                  j                  j                  ddj                  j                        t        t         j                  j                  j                  ddj                  j                        t        t         j                  j                  j                  j                  ddj                   j                        t         j                  j                  j"                  t         j                  j                  j$                  t         j                  j                  j&                  t         j                  j                  j                  t(        j*                  j,                  t         j                  j                  j.                  g} t1        t         j                  j                  j"                        dt2        dt2        d	t2        ffd
       }t1        t         j                  j                  j"                  j                        dt2        dt2        dt2        d	t2        ffd       }t1        t         j                  j                  j$                  j                        dt2        dt2        dt2        d	t2        ffd       }t1        t         j                  j                  j                        	 d3dt2        dt2        dt2        ffd       }t1        t         j                  j                  j                  j                        	 d3dt2        dt2        dt2        dt2        ffd       }t1        t         j                  j                  j&                        dt2        dt2        d	t2        ffd       }t1        t(        j*                  j,                        dt2        dt2        dt2        dt2        dt2        dt2        dt2        dt4        dt6        t8           dt8        dt8        dt8        dt4        d t4        d!t4        d"t4        f fd#       }t1        t         j                  j                  j.                  d $      dt2        d%t2        d&t2        d't2        d	t2        f
fd(       }t1        t         j                  j                  j.                  j                  d $      dt2        d)t2        d%t2        d&t2        d't2        d	t2        ffd*       }	t1        t         j                  j                  j                  d $      	 d3dt2        d%t2        d&t2        d't2        d	t2        f
fd+       }
t1        t         j                  j                  j                  j                  d $      t1        t         j                  j                  j                  j:                  d $      	 d3dt2        d%t2        d&t2        d't2        d,t2        d	t2        ffd-              }t         j                  j<                  rt        t         j                  j>                  j@                  d.djB                  j                        | jE                  t         j                  j>                  j@                         t1        t         j                  j>                  j@                        d d/dt2        d0t2        d1t2        dtF        t2           ffd2       }tI        |        y y )4Nr   )	mkldnn_irzmkldnn::_linear_pointwiseF)has_out_variantkernel_creatorzonednn::qlinear_pointwisexweightbiasc
                 r    t        j                  
j                  j                  | |||||||||	
            S N)r   createConvolutionUnary)r   r    r!   paddingstridedilationgroupsattrscalars	algorithmr   s             e/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/torch/_inductor/mkldnn_lowerings.pyconvolution_unaryz5register_onednn_fusion_ops.<locals>.convolution_unaryE   sJ     ##**11     otherc                 x    t        j                  j                  j                  | |||||||||	|
||            S r#   )r   r$   ConvolutionBinaryr   r0   r    r!   r&   r'   r(   r)   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmr   s                r-   convolution_binaryz6register_onednn_fusion_ops.<locals>.convolution_binarya   sS      ##++22 !# r/   c                 x    t        j                  j                  j                  | |||||||||	|
||            S r#   )r   r$   ConvolutionBinaryInplacer3   s                r-   convolution_binary_inplacez>register_onednn_fusion_ops.<locals>.convolution_binary_inplace   sS      ##2299 !# r/   wbc                 ^   | j                         }t        |      dkD  rt        | d|d   g      } |t        j                  j                  |      }g }t               rvt        |ddg      }	t        | |	|      ^ }
}} }	t        || |	      rHfd}t        |d uddk(  rd n|	      }|g d
|d<   t        j                  |||| |gn| ||gfi | t        |      dk(  s
t               rAt              }|d |d<   |j                   j                  || |gn| ||g|fi |       |j!                         t"        j$                  j&                  v sJ dd i}t)        d||| |gn| ||g||      }t        |      dkD  r%t        |g |d d |j                         d         }|S )N   r   r   layoutc                 "    t        |       S )Nr+   r,   r	   )bufr,   r*   r+   s    r-   epilogue_creatorzJregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.epilogue_creator   s    8w)  r/   Tnonehas_biastrans_wrG   )r@   r   r   input_indices)r*   r+   r,   Bc                 X    t         j                  j                  | j                            S r#   r   graph	constantsget_namer   s    r-   <lambda>zBregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.<lambda>       QWW..qzz|< r/   linear_unaryinput_gen_fnsget_sizelenr   r   ExternKernelrealize_inputr   r   r   r   dictr   add_choicesr   appendbindrR   r   rP   rQ   r   )r   r=   r>   r*   r+   r,   rC   x_sizechoicestransposed_w_rG   kwargsrX   resultaten_mkldnn_linear_unarys      ```         r-   rV   z0register_onednn_fusion_ops.<locals>.linear_unary   s    ZZ\F6{QR,-}OO11!4*,G!&q1a&1.5af.U+FA|/<H
 "!"$ $15EUF
 }2;/)55"#)A!Q !	 7|q $9$;4IN9"&F3K1,11"#)A!Q ! ::<177#4#4444<M /)A!Q+F 6{Qf&Ks&KV__5Fr5J&KLMr/   yc                    | j                         }t        |      dkD  rt        | d|d   g      } j                         }t        |      dkD  rt        d|d   g      |t        j                  j                  |      }g }t               rvt        |ddg      }	t        | |	|      ^ }
}} }	t        || |	      rFfd}t        |d ud|      }|g d	ng d
|d<   t        j                  |||| |gn| ||gfi | t        |      dk(  s
t               rAt              }|d |d<   |j                   j                  || |gn| ||g|fi |       |j!                         t"        j$                  j&                  v sJ dd i}t)        d||| |gn| ||g||      }t        |      dkD  r%t        |g |d d |j                         d         }|S )Nr@   rA   r   r   rB   c                      t        |       S )N)r0   r	   )rF   r*   ri   s    r-   rG   zKregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.epilogue_creator   s    8d!LLr/   TrI   )r   r@   r   )   r   r@   r   rL   )r*   rM   c                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zCregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.<lambda>  rU   r/   linear_binaryrW   rY   )r   ri   r=   r>   r*   rC   rb   y_sizerc   rd   re   rG   rf   rX   rg   aten_mkldnn_linear_binarys    `  `          r-   rn   z1register_onednn_fusion_ops.<locals>.linear_binary   s$    ZZ\F6{QR,-ZZ\F6{QR,-}OO11!4*,G!&q1a&118|Qv2.FA|Q 0<HM "!"$ $)9F
 <=9i,F?+)55%&YAq	Q1aL !	 7|q $9$;49"&F3K2-22%&YAq	Q1aL ! ::<177#4#4444<M /YAq	Q1aL+F 6{Qf&Ks&KV__5Fr5J&KLMr/   c                 t    t        j                  j                  j                  | |||||||||	|
            S r#   )r   r$   ConvolutionTransposeUnary)r   r    r!   r&   output_paddingr'   r(   r)   r*   r+   r,   r   s              r-   convolution_transpose_unaryz?register_onednn_fusion_ops.<locals>.convolution_transpose_unary%  sM     ##33::" r/   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                     t        j                  t        j                  j                  j                  | |||||||||	|
|||||            S r#   )pytreetree_mapr   r$   MkldnnRnnLayer)r   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   s                   r-   mkldnn_rnn_layerz4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layerC  sc    & ??  ((//!! r/   )type_promotion_kindpacked_weightw_scalew_zpc                     t        j                  j                  j                  | |||||||||	|
||||||            S r#   )r   r$   QConvPointWisePT2E)r   x_scalex_zpr   r   r   r!   r'   r&   r(   r)   o_inv_scaleo_zero_pointoutput_dtyper*   r+   r,   r   s                    r-   qconvolution_unaryz6register_onednn_fusion_ops.<locals>.qconvolution_unaryl  s_    ( ##,,33!  # r/   accumc                 v   |dk(  rq|t         j                  t         j                  fv rO|j                         t         j                  t         j                  fv r|j                         |k7  rt	        ||      }t        j                  j                  j                  | |||||||||	|
|||||||||||            S )Nsum)torchfloat32bfloat16	get_dtyper   r   r$   QConvPointWiseBinaryPT2E)r   r   r   r   accum_scaleaccum_zpr   r   r   r!   r'   r&   r(   r)   r   r   r   r4   alphar6   r7   unary_algorithmmr   s                         r-   qconvolution_binaryz7register_onednn_fusion_ops.<locals>.qconvolution_binary  s    8 u$ U]]ENN$CCOO%%--)HHOO%5 !5##2299!  !$- r/   c                 H  	
 | j                         }t        |      dkD  rt        | d|d   g      } t        t        j
                        sYt              t        k(  sJ t        j                  j                  t        j                  t        j                        d      nj                          t        t        j
                        sYt              t        k(  sJ t        j                  j                  t        j                  t        j                         d      nj                          j                          |j                          |j#                         t        j                   k7  rt        t        j$                  j'                  |      t        j(                        rt        j                  j*                  |j-                            j/                  t        j                         }t        j                  j                  t        j                  |t        j                         |j-                               }d nj#                         g }t1               r=t3        | ||	      ^ }}} }t        t        j$                  j'                        t        j(                        rt        j5                         j6                        dk(  rt        t        j$                  j'                  |      t        j(                        rt        j8                  t        j:                  t        j                  j*                  |j-                                  t        j                  j*                  |j-                                  rt=        || |      rt        j                  j*                  |j-                            j?                         }t        j@                  |j/                  t        j                        d	      }t        j                  j                  ||j-                         d
z         
	fd}| j#                         t        jB                  k(  sJ tE        jF                  ||| ||gn| ||gd u|g dng d       t        |      dk(  s
tI               rLtK        	
      }d |d<   |jM                   jN                  | ||fn| ||f|fi |       |j-                         t        j                  j*                  v sJ d d d d d}tQ        d|| ||gn| ||g||      }t        |      dkD  r%t        |g |d d |j                         d         }|S )Nr@   rA   dtyper   namer   rC   	out_dtyper   dim_BMatrixCompensc                   	
 t         j                  t         j                  t         j                  fv sJ | j	                         j	                         j	                         j	                         
j	                         d j	                         
fd}t        j                  | j                         t         j                  || j                               }dk7  rt        |      }t         j                  k(  rM|j	                         fd}t        j                  |j                         ||j                               }|S t         j                  k(  ryddl
m |j	                         		fd}t        j                  |j                         t        j                  |t              t              	      |j                               }|S )
Nc                     |       }t        j                  |t        j                        }| d   f} d      } d      } |      } |      }t        j                  t        j                  ||      |      }t        j
                  |t        j                  t        j                  t        j                  ||      |      |            }	y |      }
t        j                  t        j                  fv sJ 
t        j                  k(  r$t        j                  |t        j                        }t        j                  ||      }|S )NrA    r   r   r   r   mulsubr   r   )indexinputweight_compens_index_x_scale_x_zp_w_scale_weight_compotemp_biasr!   
bias_dtypebias_loaderinput_loaderw_scale_loaderweight_compens_loaderx_scale_loaderx_zp_loaders            r-   inner_fnz]register_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn9  s'   $0$7E %(LL$FE49"I<0'5b'9H$/OE'56J'KH,ABV,WM#&77 #$)$,!" !)$D $'77 $ #$'GG(+,4,4)* ).%& %2	!"$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwtU';#'Kr/   devicer   r   rangesrH   rE   c                 @     |       }t        j                  |      S r#   r   r   r   r   output_cast_loaderr   s     r-   inner_fn_cast_output_to_bf16zqregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16u      (:5(A'*||E<'H Hr/   r   _create_constantsc                 \    
|       } 	d|z  |t         j                        \  }}t        j                  ||z        |z   } 	ddt         j                        \  }}t        j                  t        j
                  ||      |      }t        j                  |t         j                        S Ng      ?r   r      r   r   r   roundminimummaximumr   uint8r   scale
zero_pointr   	inv_scalevalqminqmaxclampedr   requant_input_loaders            r-   inner_fn_requantzeregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_requant      (<U(C8I$'%K5==9" 5	: '*ii	0A&BZ&O->$%s%--."
d +.++ckk#t6Ld*S'*||GU[['I Ir/   r   r   r   r   r   r   make_loaderr   	Pointwise
get_devicerZ   r
   loweringr   	functoolspartialfloatint)input_bufferr   
output_bufr   r   r   r   r   r   r   r   r   r   r   r,   r*   r!   r   o_scaler   r   r+   r   weight_compensr   r   s        @@@@@@@@@r-   rG   zKregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator(  s   +!MM!NN!KK0     
 (4'?'?'A0>0J0J0L-)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K)( )(V &(\\#/#:#:#<"'--%-#/#8#8#:	&
  6>)B *D'Y*J
 (5>>91;1G1G1I.I *,'1'<'<'>&2)E'1':':'<	*JD  *)9 *U[[8C3=3I3I3K0
J *,'1'<'<'>&2)2):):$4*/./2</@*"
 (2':':'<	*J  *)r/   )r   rl   r   r@         )   r   rl   r   r@   r   r   rJ   rG   rL   )output_scaleoutput_zero_pointr   post_op_namepost_op_argspost_op_algorithmr!   c                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  rU   r/   c                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  rU   r/   c                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  rU   r/   c                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  rU   r/   )rl   r   r   r   qlinear_unaryrW   ))rZ   r[   r   
isinstancer   r   typer   r   rP   add_tensor_constantr   tensorr   realizer   int32r   InputsKernelunwrap_storage_for_inputConstantBufferrQ   rR   tor   r   
get_layoutsizeequal
zeros_liker   to_denser   r   r   r_   r   r^   r`   ra   r   )r   r   r   r   r   r   r!   r   r   r   r*   r+   r,   rC   rb   w_zp_tensorrc   re   W_tensorweight_compens_tensorrG   rf   rX   rg   r   r   aten_mkldnn_qlinear_unarys    `` ` ```````           @@r-   r   z1register_onednn_fusion_ops.<locals>.qlinear_unary  s   " ZZ\F6{QR,-gr||4G}---''55LL>Y 6  !dBLL1DzS(((ww22LLU[[9 3  
 OOLLN~~5;;.:88>!!4
  gg//@CCEKKPww22LLEKK@t}} 3  "&4>>3CJ*,G!/6}V|0,FA} @@F)) DOO-223q8"@@F)) (():):4==?)KL))$--/: 5VQN ww001G1G1IJSSUH,1IIhkk%++6NTU,V)%&WW%@%@-*3358II &A &N
s* s*j ;;=EKK777)55< GT='4H$wdS!%T!1)9< '92 7|q $9$;!(&2!-!%!(&/ <%)F6N2-22< GT='4H$wdS	
 ! !))+qww/@/@@@@<<<<	M /< GT='4@$wdK+F 6{Qf&Ks&KV__5Fr5J&KLMr/   x2c                   	
  | j                         }j                         }t        |      t        |      k(  sJ t        |      dkD  r'|dk(  r"t        | d|d   g      } t        d|d   g      t        t        j
                        sYt              t        k(  sJ t        j                  j                  t        j                  t        j                        d      nj                          t        t        j
                        sYt              t        k(  sJ t        j                  j                  t        j                  t        j                         d      nj                          j                          |j                          |j#                         t        j                   k7  rt        t        j$                  j'                  |      t        j(                        rt        j                  j*                  |j-                            j/                  t        j                         }t        j                  j                  t        j                  |t        j                         |j-                               }|dk(  r
t        j                  t        j0                  fv rPj#                         t        j                  t        j0                  fv r j#                         
k7  r't3        
      nj#                         
k(  sJ d	       j#                          j#                         nd g }t5               r&|dk(  r t7        | ||

      ^ }}} }t        t        j$                  j'                        t        j(                        rt        j9                         j:                        dk(  rt        t        j$                  j'                  |      t        j(                        rst        j<                  t        j>                  t        j                  j*                  |j-                                  t        j                  j*                  |j-                                  rtA        || |      rt        j                  j*                  |j-                            }|jC                         }t        jD                  |j/                  t        j                        d      }t        j                  j                  ||j-                         dz         	
 fd}tG        jH                  ||	| ||gn	| ||gd u|g dng d       t        |      dk(  s
tK               rRtM        	
||||
      }d |d<   |jO                   !jP                  	| ||fn	| ||f|fi |       |j-                         t        j                  j*                  v sJ d d d d}d |d<   tS        d|	| ||gn	| ||g||      }t        |      dkD  r*|dk(  r%t        |g |d d |j                         d         }|S )Nr@   r   rA   r   r   r   r   r   zCdtype of accum for qlinear post op sum should be the same as outputr   r   r   r   c                 (  	
 t         j                  t         j                  t         j                  fv sJ | j	                         j	                         j	                         j	                         j	                         
j	                         d j	                         
f
d}t        j                  | j                         t         j                  || j                               }dk7  rt        |      }t         j                  k(  rM|j	                         fd}t        j                  |j                         ||j                               }|S t         j                  k(  rddl
m |j	                         		fd}t        j                  |j                         t         j                  t        j                  |t              t              	      |j                               }|S )
Nc                   
  |       } |       } d      } d      }t        j                  |t        j                        }| d   f} |      } |      }t        j                  t        j                  ||      |      }t        j
                  |t        j                  t        j                  t        j                  ||      |      |            }
y |      }	t        j                  t        j                  fv sJ t        j                  k(  r$t        j                  |	t        j                        }	t        j                  ||	      }t        j                  t        j                  fv sJ t        j                  k(  r$t        j                  |t        j                        }t        j                  ||      }|S )Nr   rA   r   )r   r   _x2r   r   r   r   _weight_compensr   r   r!   r   r   r   r   r   x2_dtype	x2_loaderr   r   s             r-   r   z^register_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fnS  s   $0$7E"+E"2C'5b'9H$/OE %(LL$FE49"I<0'56J'KH.C 4/O $'77 #$)$,!" !)$D $'77 $ #$'GG(+,4,4)* ).%& %4	!"$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwtU'; $,u~~/N#NN#N'5>>9&)ll3&F#&774#5D#'Kr/   r   rH   rE   c                 @     |       }t        j                  |      S r#   r   r   s     r-   r   zrregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16  r   r/   r   r   c                 \    
|       } 	d|z  |t         j                        \  }}t        j                  ||z        |z   } 	ddt         j                        \  }}t        j                  t        j
                  ||      |      }t        j                  |t         j                        S r   r   r   s            r-   r   zfregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_requant  r   r/   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r!   r   r   r   r   r   r6   r7   r   r   r  r  r   r   s        @@@@@@@@@@r-   rG   zLregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator@  s   +!MM!NN!KK0      (4'?'?'A$&NN$4	0>0J0J0L-)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K5( 5(n &(\\#/#:#:#<"'--%-#/#8#8#:	&
 &/)B * *(5*:	*J (5>>91;1G1G1I.I *,'1'<'<'>&2)E'1':':'<	*JD  *)9 *U[[8C3=3I3I3K0
J *,'1'<'<'>&+kk)2):):$4*/./2</@*"
 (2':':'<	*J  *)r/   )r   rl   r   r@   r   r   r   )   r   rl   r   r@   r   r   r   r   )
r   r   r   other_scaleother_zpbinary_post_opr5   unary_post_opunary_post_op_argsunary_post_op_algorithmr!   c                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  rU   r/   c                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  rU   r/   c                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  rU   r/   )rl   r   r   c                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  s    QWW->->qzz|-L r/   r  qlinear_binaryrW   )*rZ   r[   r   r   r   r   r   r   r   rP   r   r   r   r   r  r   r  r   r  r  r  rQ   rR   r  r   r   r   r   r  r  r	  r
  r   r  r   r   r_   r   r^   r`   ra   r   )"r   r   r   r   r   r   r  r!   r   r   r   x2_scalex2_zpr4   r   r6   r7   r   rC   rb   x2_sizer  rc   re   r  r  rG   rf   rX   rg   r   r   r  aten_mkldnn_qlinear_binarys"    `` ` `````    ```            @@@r-   r%  z2register_onednn_fusion_ops.<locals>.qlinear_binary  s   6 ZZ\FkkmGv;#g,...6{Q;%#7R,-"r72;/0gr||4G}---''55LL>Y 6  !dBLL1DzS(((ww22LLU[[9 3  
 OOLLN~~5;;.:88>!!4  gg//@CCEKKPww22LLEKK@t}} 3  e#MMNN$  lln(GG||~5
 &b,7 ,6]\]6||~H-1-=)4J*,G "{e';3:}b<40FA}b @@F)) DOO-223q8"@@F)) (():):4==?)KL))$--/: 5VQN ww001G1G1IJH'002H,1IIhkk%++6NTU,V)%&WW%@%@-*3358II &A &N
D* D* D*L *55< GT='4L$wbRVW!%T!1)9  < '<5 7|q $9$;!(&2!- ("#.!&",'4,< <%)F6N3.33< GT='4L$wbRVW	
 ! !))+qww/@/@@@@<<<M
 #La . < GT='4D$wb$O+F 6{Q;%#7f&Ks&KV__5Fr5J&KLMr/   zmkl::_mkl_linearrB   packed_worig_wc                8   g }t               rMt        |ddg      }t        | ||      ^ }}} }t        || |      rt	        j
                  ||| ||gdddg       t        |      dk(  s
t               r'|j                  j                  | ||f|d |             |j                         t        j                  j                  v sJ |j                         t        j                  j                  v sJ d d	 d
}	t        d|| ||g||	      }
|t        |
|      }
|
S )Nr   r   rB   Tr@   )rK   rL   )rM   
batch_sizec                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zGregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>/      !2!21::<!@ r/   c                 X    t         j                  j                  | j                            S r#   rO   rS   s    r-   rT   zGregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>0  r/  r/   )r   r@   packed_linearrW   )r   r   r   r   r   r_   r[   r   r`   ra   rR   r   rP   rQ   r   r   )r   r*  r+  r>   r-  rC   rc   rd   re   rX   rg   aten_mkl_linears              r-   mkl_packed_linearz5register_onednn_fusion_ops.<locals>.mkl_packed_linear
  s>    /1#%#*6Aq6#:L29<3/Q< 4FA|L-99#"&1$(+,a& w<1$(=(?NN',,&16Tj -   ((*agg.?.????(AGG,=,==== A@! %>#&)"/% = ^Fr/   r#   )%r   _C_has_mkldnn r   r   r   mkldnn_linear_pointwiseLinearUnaryr$   binaryLinearBinaryonednnqlinear_pointwiseQLinearPointwisePT2EQLinearPointwiseBinaryPT2E_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiser   r   defaultqconv2d_pointwiser   r   boolr   r   binary_tensorhas_mklmkl_mkl_linearMKLPackedLinearr`   r   r   )cpu_needs_realized_inputsr.   r9   r<   rV   rn   rt   r   r   r   r   r%  r3  r2  rp   rh   r)  r  r   s                @@@@@@r-   register_onednn_fusion_opsrL      s   xx#5II..'!$0077	$
  %7II..55'!$1188	%
! %7II..'!$99@@	%
! &8II..55'!$??FF	&
" II33II44II==II..!!))II..%
! 
599++BB	C			 	 
D	6 
599++BBII	J			 	 		 
K	B 
599++CCJJ	K			 	 		 
L	B 
599++==	> ?	?	?	 ?	 
??	B 
599++==DD	EQU;	;	&;	+4;	9B;	 
F;	z 
599++LL	M			 	 
N	: 
40088	9&	&	&	 &	 	&	
 &	 &	 &	 &	 c&	 &	 &	 &	 &	  &	 &	  !&	 
:&	P 
599++==SW	X'	'	 %	'	
 '	 '	 '	 
Y'	R 
II..554

<	<	 	<	 %<	 <	 <	 <	

<	| 
599++==SW	X x	x	 %	x	
 x	 x	 x	 
Yx	t 
II..554

 
II..<<RV

, 'g	g	 %	g	
 g	 g	 g	 g	



g	R	 880		))" %(88??	O &,,UYY]]-F-FGuyy}}889 00#0 "0 I&	0 :0d 	"";<r/   )%r   typingr   r   r   torch.utils._pytreeutils_pytreer    torch._inductor.kernel.mm_commonr   r6  r   codegen.cpp_gemm_templater   codegen.cpp_utilsr
   r   r   r   r   r   r   r   r   r   select_algorithmr   r   r   r   r   r   virtualizedr   r   rL  r   r/   r-   <module>rV     sP     !  $ $ 4  < 8    
 Y X _r/   