
    Ǆg?              "          d dl mZmZmZmZmZ d dlZd dlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddgZ G d de      Zd	e d
e d
e d
e	 d	e_        dee   dee   dee   dee   dee   dee   dededededededededefdZdee   dee   dee   dee   dee   dee   dededededededededefdZ ee      	 	 	 	 	 d dee   dee   dee   dee   dee   dee   dee   dededededededededef d       Zy)!    )castListOptionalTupleUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype
_get_value_maximize_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTASGDasgdc                        e Zd Z	 	 	 	 	 	 	 	 	 ddedeeef   dededededee   ded	ed
ef fdZ	 fdZ
d Zedd       Z xZS )r   paramslrlambdalphat0weight_decayforeachmaximizedifferentiable
capturablec                     t        |t              r|j                         dk7  rt        d      d|k  st        d|       d|k  st        d|       t	        ||||||||	|
	      }t
        |   ||       y )Nr	   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid weight_decay value: )	r   r   r   r   r   r    r!   r"   r#   )
isinstancer   numel
ValueErrordictsuper__init__)selfr   r   r   r   r   r   r    r!   r"   r#   defaults	__class__s               X/home/mcse/projects/flask_80/flask-venv/lib/python3.12/site-packages/torch/optim/asgd.pyr*   zASGD.__init__   s     b&!bhhjAo:;;by6rd;<<l";L>JKK%)!

 	*    c                    t         |   |       | j                  D ]f  }|j                  dd        |j                  dd       |j                  dd       |j                  dd       |d   D ]  }| j                  j                  |g       }t        |      dk7  s/t        j                  |d         s;t        |d         }t        j                  |t               |j                  	      |d<   t        j                  |d
         s0t        j                  |d
   t               |j                  	      |d
<   t        j                  |d         rt        j                  |d   t               |j                  	      |d<    i y )Nr    r!   Fr"   r#   r   r   step)dtypedeviceetamu)r)   __setstate__param_groups
setdefaultstategetlentorch	is_tensorfloattensorr   r3   )r+   r9   grouppp_statestep_valr-   s         r.   r6   zASGD.__setstate__>   sF   U#&& 	EY-Z/-u5\518_ **..B/w<1$ ??76?;#(#9*/,,$,=,?+ !??75>:).#EN2C2Eahh* !??74=9(-#DM1B1DQXX)	r/   c                 |   d}|d   D ]  }	|	j                   |t        j                  |	      z  }|j                  |	       |	j                   j                  rt        d      |j                  |	j                          | j                  |	   }
t        |
      dk(  rt        j                  d|	j                  t                     |
d<   t        j                  |d   |	j                  t                     j                         j                         |
d	<   t        j                  d|	j                  t                     |
d
<   t        j                  |	t        j                         |
d<   |j                  |
d
          |j                  |
d          |j                  |
d	          |j                  |
d           |S )NFr   z&ASGD does not support sparse gradientsr    )r3   r2   r1   r   r4   r5   )memory_formatax)gradr<   
is_complexappend	is_sparseRuntimeErrorr9   r;   zerosr3   r   	as_tensorclonedetachones
zeros_likepreserve_format)r+   r@   params_with_gradgradsmusaxsetasstate_stepshas_complexrA   r9   s              r.   _init_groupzASGD._init_groupV   st   x 	2Avv!u//22 ''*66##&'OPPQVV$

1u:?$)KK1883D3F%E&M !$K@Q@S  %L #(**1883D3F#E$K #("2"2)>)>#E$K 

5;'

5;'E%L)""5=1?	2@ r/   c                 b   | j                          d}|$t        j                         5   |       }ddd       | j                  D ][  }g }g }g }g }g }g }	| j	                  |||||||	      }
t        ||||||	|d   |d   |d   |d   |d   |d   |d   |d	   |d
   |
       ] |S # 1 sw Y   uxY w)zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   r    r!   r"   r#   )
r   r   r   r   r   r    r!   r"   r#   rZ   ) _cuda_graph_capture_health_checkr<   enable_gradr7   r[   r   )r+   closurelossr@   rT   rU   rV   rW   rX   rY   rZ   s              r.   r1   z	ASGD.stepz   s    	--/""$ !y! && 	E-/"$E "C "C!#D(*K**'S$K  Gn;;Gn">2i(z*$%56 .'!	> E! !s   B%%B.)	g{Gz?g-C6?g      ?g    .Ar   NFFFN)__name__
__module____qualname__r   r   r>   r   r   boolr*   r6   r[   r   r1   __classcell__)r-   s   @r.   r   r      s     $("&$ ++ %- + 	+
 + + + $+ + + +B0"H "- "-r/   ap  Implements Averaged Stochastic Gradient Descent.

    It has been proposed in `Acceleration of stochastic approximation by
    averaging`_.

    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lambd (float, optional): decay term (default: 1e-4)
        alpha (float, optional): power for eta update (default: 0.75)
        t0 (float, optional): point at which to start averaging (default: 1e6)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        zx

    .. _Acceleration of stochastic approximation by averaging:
        https://dl.acm.org/citation.cfm?id=131098

    r   rU   rW   rV   rX   rY   r   r   r   r   r   r!   r"   r#   rZ   c       	   
         t        |       D ]  \  }}||   }|s|n| }||   }||   }||   }||   }t        j                  j                         s|rt	               }|j
                  j                  |j
                  j                  cxk(  r3|j
                  j                  cxk(  r|j
                  j                  k(  rn n|j
                  j                  |v sJ d| d       t        j                  |      r?t        j                  |      }t        j                  |      }t        j                  |      }|dz  }|
dk7  r|j                  ||
      }|r,|j                  d||z  z
         |j                  ||d       n6t        |      }|j                  d||z  z
         |j                  ||        |s|j                         dk7  r0|j                  |j                  |      j                  |             n|j!                  |       |r`|j!                  |d||z  |z  z   |	z  z         |j!                  dt        j"                  ||z
  t        j$                  |            z         et        |      }t        j&                  |d||z  |z  z   |	z  z        }|j!                  |       t        j&                  dt)        d||z
        z        }|j!                  |        y )NUIf capturable=True, params, mus, etas, and state_steps must be on supported devices: .r	   r   r   value)	enumerater<   _utilsis_compilingr   r3   typerI   view_as_realaddmul_addcmul_r   add_itemsubcopy_maximum	ones_likerN   max)r   rU   rW   rV   rX   rY   r   r   r   r   r   r!   r"   r#   rZ   iparamrH   r5   rG   r4   step_tcapturable_supported_devices	eta_valuer1   new_etanew_mus                              r.   _single_tensor_asgdr      s   $ f% 75Qx#t$VV1gQ ||((*z+L+N(!!99>>&::??& ==%%& LL%%)EE	))E(FaI	F E"%%d+D&&u-E##B'B 	!188E86DJJq53;'NN4BN/"3IJJq59,,-JJtI:J. aGGEIIbM&&r*+HHUOIIbQf!44>?@HHQv{EOOF4KLLMf%DoobQd1B-Bu,L&MNGIIg__QQr	):%:;FHHVo7r/   c       	         F  " t        |       dk(  ry |rJ d       t        j                  j                         s9|r7t	        d      "t        "fdt        | |||      D              sJ d" d       t        j                  | |||||g      }|j                         D ]x  \  \  }}\  \  }}}}}}}t        t        t           |      }t        t        t           |      }t        t        t           |      }t        t        t           |      }t        t        t           |      }t        t        t           |      }|rt        |||       |rt        j                  |      }t        j                  j                         s=|d   j                  r.t        j                   |t        j"                  dd	
      d       nt        j                   |d       |
dk7  rN|rt        j                   |||
       |}nt        j$                  |||
      }t        j                   |||       nt        j$                  |||      }t        j&                  |||d       ~t        j(                  ||      }t        j&                  |||       ~|rt        j(                  ||      }t        j*                  |d       t        j,                  |       t        j.                  ||       ~t        j0                  ||      } t        j2                  | |       t        j                   | d       t        j4                  | |	       t        j,                  |        t        j2                  | |       t        j.                  ||        |D !cg c](  }!t        j6                  |d||z  |!z  z   |	z  z  |
      * } }!|D !cg c]2  }!t        j6                  dt9        dt;        |!      |z
        z  |
      4 }}!t        j.                  ||        t        j.                  ||       { y c c}!w c c}!w )Nr   z#_foreach ops don't support autogradF)supports_xlac              3   ,  K   | ]  \  }}}}|j                   j                  |j                   j                  cxk(  xr5 |j                   j                  cxk(  xr |j                   j                  k(  nc xr |j                   j                  v   y wra   )r3   rq   ).0rA   r5   r4   r1   r   s        r.   	<genexpr>z%_multi_tensor_asgd.<locals>.<genexpr>+  so      
 !2sD HHMMRYY^^RszzR$++BRBRR >!==>
s   BBrh   ri   g      ?cpu)r3   rj   r	   rk   rl   )r;   r<   ro   rp   r   allzipr   "_group_tensors_by_device_and_dtypeitemsr   r   r   r   _foreach_negis_cpu_foreach_add_r?   _foreach_add_foreach_addcmul__foreach_sub_foreach_maximum__foreach_reciprocal__foreach_copy__foreach_mul_foreach_mul__foreach_pow_rN   r|   r   )#r   rU   rW   rV   rX   rY   r   r   r   r   r   r!   r"   r#   rZ   grouped_tensorsr3   _grouped_params_grouped_grads_grouped_axs_grouped_mus_grouped_etas_grouped_state_steps_grouped_paramsgrouped_gradsgrouped_axsgrouped_musgrouped_etasgrouped_state_stepsintermediatenew_musnew_etasr1   r   s#                                     @r.   _multi_tensor_asgdr     s   $ 6{aDDD <<$$&:'H(
$  
 %(T;$G
 
 	C cc  cA  AB  C		C 
  BB	S$4O 
			 c7 
	 
	
 	d6lO<T&\>:4<64<6DL-8"4<1EF.-E!..}=M ||((*/B1/E/L/L#U\\#e%DC  3Q7 1##M>V,$11!>  nEJ --~UL 	lRTU )).+F\;G (()<bAG##GS1&&w/  g6 ))*=uEH"-!,%0&&x0"-  x8 0 q52:+<'<&F GPVWH  0 C:d+;b+@$A A&QG    x8  g6Gc7ts   =-P07P)single_tensor_fnr    c                   |t        | |d      \  }}|r)t        j                  j                         rt	        d      |r%t        j                  j                         st
        }nt        } || |||||||||||||	|
       y)znFunctional API that performs asgd algorithm computation.

    See :class:`~torch.optim.ASGD` for details.
    NF)	use_fusedz6torch.jit.script not supported with foreach optimizers)	r   r   r   r   r   r!   r"   r#   rZ   )r   r<   jitis_scriptingrL   r   r   )r   rU   rW   rV   rX   rY   r    r!   r"   r#   rZ   r   r   r   r   r   r   funcs                     r.   r   r     s    4 1Ne

7 599))+STTuyy--/!"!%r/   )NFFFF)typingr   r   r   r   r   r<   r   	optimizerr
   r   r   r   r   r   r   r   r   r   r   r   r   __all__r   __doc__r>   re   r   r   r   rE   r/   r.   <module>r      s   6 5     " 6
L9 L^	 
 		 		 		 !0ILI<I 
fI 
f	I
 v,I fI I 	I 	I I I I I I  !IXH7LH7<H7 
fH7 
f	H7
 v,H7 fH7 H7 	H7 	H7 H7 H7 H7 H7 H7  !H7V  1DE # 6L6<6 
f6 
f	6
 v,6 f6 d^6 6 6 6 6 6  	!6" 	#6$ %6& '6 F6r/   