
    ǄgQ              !       h   d dl mZmZmZmZ d dlZd dlmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZmZmZ ddgZ G d de      Zd	d
e de de
 dz   e_        	 	 	 	 	 	 	 d#dee   dee   dee   dee   dee   dee   dee   dedee   dededededededef dZd Zdee   dee   dee   dee   dee   dee   dedededededededefdZdee   dee   dee   dee   dee   dee   dedededededededefd Zdee   dee   dee   dee   dee   dee   dedededededededed!dfd"Zy)$    )castListOptionalUnionN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdagradadagradc                        e Zd Z	 	 	 	 	 	 ddddddedeeef   dededed	ed
ee   dededee   f fdZ	 fdZ
d Zd Zedd       Z xZS )r   NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   c                F   t        |t              r|j                         dk7  rt        d      d|k  st        d|       d|k  st        d|       d|k  st        d|       d|k  st        d|       d|k  st        d|       t	        ||||||||	|
		      }t
        |   ||       |
r!|	rt        d
      |rt        d      d| _        | j                  D ]  }|d   D ]  }| j                  |   }|d   r/t        j                  dt        |d         |j                        nt        j                  dt                     |d<   t        j                   |      rt#        ||      n|}t        j$                  ||t        j&                        |d<     y )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r   r   r   r    r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    is_fused)dtypedevicer%   step)memory_formatsum)
isinstancer   numel
ValueErrordictsuper__init__RuntimeError"_need_device_dtype_check_for_fusedparam_groupsstatetorchzerosr   r&   tensor
is_complexcomplex	full_likepreserve_format)selfr   r   r   r   r   r   r    r   r   r   defaultsgrouppr4   
init_value	__class__s                   [/home/mcse/projects/flask_80/flask-venv/lib/python3.12/site-packages/torch/optim/adagrad.pyr0   zAdagrad.__init__   s    b&!bhhjAo:;;by6rd;<<h7zBCCl";L>JKK//;<U;VW  cz6se<==%&?)

 	*"#NOO"#UVV6:D3&& 	E8_ 

1 W~ KK/wH xx c1B1DE f ''* 57PQ2 
  %z1F1F e!	    c                    t         |   |       d }| j                  D ]J  }|j                  dd        |j                  dd       |j                  dd       |j                  dd       }L t	        | j
                  j                               }t        |      dk7  xr t        j                  |d   d         }|s8|D ]2  }t        j                  t        |d         t        |      	      |d<   4 y y )
Nr    r   Fr   r   r   r(   r#   r'   )r/   __setstate__r3   
setdefaultlistr4   valueslenr5   	is_tensorr7   floatr   )r<   r4   r   r>   state_valuesstep_is_tensorsrA   s          rB   rE   zAdagrad.__setstate__`   s    U# && 	4EY-Z/-u5$$Wd3E		4 DJJ--/0l+q0 
eooOF#7
 ! !LL!F)$,=u,M&	 rC   c                 ~    | j                   D ].  }|d   D ]$  }| j                  |   }|d   j                          & 0 y )Nr   r*   )r3   r4   share_memory_)r<   r>   r?   r4   s       rB   share_memoryzAdagrad.share_memoryu   sG    && 	-E8_ -

1e**,-	-rC   c                    d\  }}|d   D ]  }|j                   |d   r!t        | dd      rt        |d       d| _        ||j                   j                  z  }|t        j                  |      z  }|j                  |       |j                  |j                          | j                  |   }	|j                  |	d          |j                  |	d	           ||fS )
N)FFr   r   r2   T)cuda_unsupportedFr*   r(   )	gradgetattrr
   r2   	is_sparser5   r8   appendr4   )
r<   r>   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexr?   r4   s
             rB   _init_groupzAdagrad._init_group{   s    '3$x 	2Avv!>g8'
 2!dK>CD;166#3#33u//22 ''*QVV$

1!!%,/""5=1	2" ++rC   c                 b   d}|$t        j                         5   |       }ddd       | j                  D ]k  }g }g }g }g }| j                  |||||      \  }}	t	        |||||d   |d   |d   |d   ||d   |d   |d   |	|d	   t        | d
d      t        | dd             m |S # 1 sw Y   xY w)zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r    r   r   r   
grad_scale	found_inf)r   r   r   r   r\   r    r   r   r]   r   r`   ra   )r5   enable_gradr3   r^   r   rU   )
r<   closurelossr>   rX   rY   rZ   r[   r\   r]   s
             rB   r(   zAdagrad.step   s     ""$ !y! && 	E-/"$E')J(*K+/+;+;'
K,(O[  ;">2z*%L /i(z*$%56'Gn"4t<!$T:!	: A! !s   B%%B.)g{Gz?r   r   r   g|=NN)__name__
__module____qualname__r   r   rK   r   r   boolr0   rE   rQ   r^   r   r(   __classcell__)rA   s   @rB   r   r      s     $(+,"&D $ $DD %- D 	D
 D $)D D $D D D ~DL*-,* "* "*rC   a[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    aP  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    r   rY   rZ   r[   r   r`   ra   r\   r    r   r]   r   r   r   r   r   c                   t        d |D              st        d      ||t        | |	d      \  }}|d}|d}|r)t        j                  j                         rt        d      |r)t        j                  j                         rt        d      |r%t        j                  j                         st        }n-|r%t        j                  j                         st        }nt        } || ||||||||||	|
||       y)	ztFunctional API that performs Adagrad algorithm computation.

    See :class:`~torch.optim.Adagrad` for details.
    c              3   P   K   | ]  }t        |t        j                           y wre   )r+   r5   r   ).0ts     rB   	<genexpr>zadagrad.<locals>.<genexpr>  s     @qz!U\\*@s   $&zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r   r\   r   r   r]   r`   ra   )	allr1   r	   r5   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   rY   rZ   r[   r   r`   ra   r\   r    r   r]   r   r   r   r   r   _funcs                     rB   r   r      s    2 @K@@^
 	
 }1Ne

7 }599))+STT'')QRRUYY++-	//1$%!'%rC   c                 P    | j                         }t        j                  |||      S re   )sizer5   sparse_coo_tensor)rT   grad_indicesrH   r{   s       rB   _make_sparser~   <  s"    99;D""<>>rC   c          
      B   ||J t        | |||      D ]  \  }}}}|dz  }t        |      }|s|n| }|dk7  r*|j                  rt        d      |j	                  ||      }|d|dz
  |z  z   z  }|j                  r|j                         }|j                         }|j                         }|j                  t        |||j                  d                   |j                  |      }|j                         j                         j                  |	      }|j                  t        ||||z        |        &t        j                  |      }|r?t        j                  |      }t        j                  |      }t        j                  |      }|j!                  ||d       |r|j#                         |	z   }n|j#                         j                  |	      }|j%                  |||        |st        j&                  |      }t        j&                  |      }
 y )Nr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)zipr   rV   r1   addcoalesce_indices_valuesadd_r~   powsparse_masksqrt_r5   r8   view_as_realaddcmul_sqrtaddcdiv_view_as_complex)r   rY   rZ   r[   r`   ra   r   r   r   r   r\   r   r   r]   paramrT   	state_sumstep_tr(   clrr}   grad_valuesstd
std_valuesr8   s                            rB   rw   rw   A  s   " )"333*-feZ*U (=&tY!&!#t$1~~"Q  88E86DAX--.>>==?D==?L,,.KNN<lKOOA<NOP''-C,,.33C8JJJT<z1IJSVRV   ))%0J))$/!..y9	**51tT3nn&,nn&++C0NN4SDN1--e4!11)<	Q(=rC   c                   |rJ d       ||J t        |       dk(  ry t        j                  | |||g      }|j                         D ]  \  \  }}}}}t	        t
        t           |      }t	        t
        t           |      }t	        t
        t           |      }t	        t
        t           |      }|
xr t        d |D              }|rt        ||||||||	d|||||       |rt        |||       |rt        j                  |      }t        j                  j                         s=|d   j                  r.t        j                  |t        j                   dd      d	       nt        j                  |d
       |dk7  r3|rt        j                  |||	       nt        j"                  |||	      }|D cg c]  }| d
t%        |      d
z
  |z  z   z   }}t        j&                  |||d
       t        j(                  |      }t        j                  ||	       |dk7  s|rt        j*                  ||       |}nt        j,                  ||      }t        j.                  |||       ! y c c}w )Nz#_foreach ops don't support autogradr   c              3   4   K   | ]  }|j                     y wre   )rV   )rm   rT   s     rB   ro   z(_multi_tensor_adagrad.<locals>.<genexpr>  s      9
#DNN9
s   Trq   g      ?cpu)r&   r   r   r   )rI   r   "_group_tensors_by_device_and_dtyperH   r   r   r   anyrw   r   r5   _foreach_neg_utilsis_compilingis_cpu_foreach_add_r7   _foreach_addr   _foreach_addcmul__foreach_sqrt_foreach_mul__foreach_mul_foreach_addcdiv_)r   rY   rZ   r[   r`   ra   r   r   r   r   r\   r   r   r]   grouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_rx   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsdevice_has_sparse_gradr(   	minus_clrr   	numerators                                rB   rv   rv   ~  s   " DDD)"333 6{a#FF	
K0  &&(M? 		 	T&\>:DL-8 f/AB!$v,0CD!0 "
S 9
'39
 6
 ""!")! $!-'%#   -7HI --l;L ||((*/A!/D/K/K"ELLU$C3  2A61##L-|T$11 -| 
 GY
>BRC1
4(1,889
	 
 	 1<UVW!!"34C%1i8$I**<CIy#>[M?z
s   5I+returnc                   | sy |
s|rt        d      |rt        d      ||j                  |ind }||j                  |ind }t        j                  | |||g      }|j	                         D ]  \  \  }}\  \  }}}}}t        t        t           |      }t        t        t           |      }t        t        t           |      }t        t        t           |      }d\  }}|!|||vr|j                  |d      ||<   ||   }|!|||vr|j                  |d      ||<   ||   }t        j                  |d       t        j                  ||||||||	|||       |t        j                  ||gt        |      z          y )Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=True)NNT)non_blockingr   )r   r   r   r   r   r`   ra   )r1   r&   r   r   itemsr   r   r   tor5   r   _fused_adagrad__foreach_sub_rI   )r   rY   rZ   r[   r`   ra   r   r   r   r   r\   r   r   r]   grad_scale_dictfound_inf_dictgrouped_tensorsr&   rx   r   r   r   r   r   r   r   r   device_grad_scaledevice_found_infs                                rB   ru   ru     s   " +RSSJ
 	

 ,6+A		J't  7@6Ki&&	2QUNBB	
K0O 
			 ( 	 	
	T&\>:DL-8 f/AB!$v,0CD.8++!o&A_,*4--T-*R' / 7 ^%?.)2f4)Pv&-f5.2%(&	
 '"%5$6=O9P$PM(rC   )NNNFNFF)typingr   r   r   r   r5   r   	optimizerr	   r
   r   r   r   r   r   r   r   r   r   __all__r   __doc__ri   rK   r   r~   rw   rv   ru   r"   rC   rB   <module>r      s   . .      i
 bi bL4	 
 		 		 5/ r !#'"& "" GLG<G VG f	G
 D>G  G G G d^G G G 	G  !G" #G$ 
%G& 'GT?
:=L:=<:= V:= f	:=
  := := 	:= := := 
:= := := := :=zh?Lh?<h? Vh? f	h?
  h? h? 	h? h? h? 
h? h? h? h? h?VKLK<K VK f	K
  K K 	K K K 
K K K K K  
!KrC   