
    Ǆg                        U d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZ ddlZddlmc m Z  ddl!m"Z" dd	l#m$Z$m%Z%m&Z&m'Z'm(Z( dd
lm)Z) eedf   Z*ee+d<   ee,ef   Z-ee+d<   ee,ef   Z.ee+d<   eeej^                     ej`                  f   Z1ede*e-geee*e-f      f   Z2ee+d<   ede*e-gdf   Z3ee+d<   g dZ4 e       Z5ee6e2f   e+d<    e       Z7ee6e3f   e+d<   ej`                  ejp                  jr                  jt                  gZ; G d d      Z< e<       Z=d Z>d Z?d Z@d4dZA	 d5deej`                     deBdeBdeeBeBf   fdZC	 d5d ej`                  d!eBddfd"ZDd# ZEd4d$ZFd6d%eBdee,   fd&ZGd'ZHd(ZId)ZJd*ZKd+ZLd,e2de)fd-ZMd,e3de)fd.ZNeeej`                     eee,ef      f   ZOee+d/<    ed0      ZP ed1      ZQ ed2      ZR G d3 d      ZSy)7zBase optimizer.    N)defaultdictOrderedDict)deepcopy)chain)AnyCallablecastDefaultDictDictHashableIterableListOptionaloverloadSetTupleTypeVarUnion)	ParamSpecSelf	TypeAlias)is_compiling)&_get_foreach_kernels_supported_devices$_get_fused_kernels_supported_devices"_group_tensors_by_device_and_dtypeIndicesTensorListList)RemovableHandle.ArgsKwargs	StateDict	OptimizerGlobalOptimizerPreHookGlobalOptimizerPostHook)r"    register_optimizer_step_pre_hook!register_optimizer_step_post_hook_global_optimizer_pre_hooks_global_optimizer_post_hooksc                       e Zd ZdZdefdZy)_RequiredParameterzCSingleton class representing a required parameter for an Optimizer.returnc                      y)Nz<required parameter> selfs    ]/home/mcse/projects/flask_80/flask-venv/lib/python3.12/site-packages/torch/optim/optimizer.py__repr__z_RequiredParameter.__repr__@   s    %    N)__name__
__module____qualname____doc__strr1   r-   r2   r0   r*   r*   =   s    M&# &r2   r*   c                 >      fd}t        j                  |        |S )Nc                 x   dd l } |j                         }	  |j                  | j                  d          |j                  j                           | g|i |}|j                  j                           |j                  |       |S # |j                  j                           |j                  |       w xY w)Nr   differentiable)torch._dynamois_grad_enabledset_grad_enableddefaults_dynamograph_break)r/   argskwargstorch	prev_gradretfuncs         r0   	_use_gradz/_use_grad_for_differentiable.<locals>._use_gradH   s    )E))+		. #E""4==1A#BCMM%%'t-d-f-CMM%%'"E""9-
 MM%%'"E""9-s   AB .B9)	functoolsupdate_wrapper)rF   rG   s   ` r0   _use_grad_for_differentiablerJ   G   s     2 Y-r2   c                     t         j                  j                         st               r| S t	        | t         j
                        r| j                         S | S N)rC   jitis_scriptingr   
isinstanceTensoritemxs    r0   
_get_valuerT   e   s9    99!!#%a6qvvx=A=r2   c                     t         j                  j                         st               rt        j                  |       S | S rL   )rC   rM   rN   r   stackrR   s    r0   _stack_if_compilingrW   m   s)    99!!#{{1~r2   c                 >    | r| t               | j                  <   d }|S )Nc                 .    dd l }t        j                         |j                         j                  }d	 t        |j                               j                  d      t        j                          fd       }|S # t        $ r dY /w xY w)Nr   Tstate_stepsFc                      t               rN|j                  dd      sr|    r|    d   j                  sd|v r|d   r|d   d   j                  r | i |S  | i |S )N
capturableFr   rZ   )r   getis_cuda)rA   rB   disabled_funcrF   has_state_stepsstate_steps_inds     r0   maybe_fallbackzG_disable_dynamo_if_unsupported.<locals>.wrapper.<locals>.maybe_fallback   sx    ~JJ|U3#/*tO/DQ/G/O/O!V+}-}-a088 %d5f55T,V,,r2   )inspectrC   _disable_dynamo	signature
parameterslistkeysindex
ValueErrorrH   wraps)rF   rc   psrb   r_   r`   ra   s   `   @@@r0   wrapperz/_disable_dynamo_if_unsupported.<locals>.wrapper|   s    --d3t$//	$"2779o33MBO 
		- 
	- 1  	$#O	$s   (B BB)globalsr3   )single_tensor_fnrm   s     r0   _disable_dynamo_if_unsupportedrp   t   s'    
 /?	"++, D Nr2   paramsr:   	use_fusedr+   c                     t         j                  j                         s|ryt               t	               |xr t        fd| D              }| xr t        fd| D              }||fS )N)FFc              3      K   | ]L  }|d u xsB t        |      t        v xr/ |j                  j                   v xr t        j                  |       N y wrL   )type_foreach_supported_typesdevicerC   is_floating_point).0pfused_supported_devicess     r0   	<genexpr>z/_default_to_fused_or_foreach.<locals>.<genexpr>   s_        	
T	 	
G// +!88+''*		
s   AAc              3      K   | ]5  }|d u xs+ t        |      t        v xr |j                  j                   v  7 y wrL   )ru   rv   rw   )ry   rz   foreach_supported_devicess     r0   r|   z/_default_to_fused_or_foreach.<locals>.<genexpr>   sL         	
T	 	
G// ;!::	
 s   ;>)rC   rM   rN   r   r   all)rq   r:   rr   fusedforeachr~   r{   s        @@r0   _default_to_fused_or_foreachr      s|     yy>BD F H #   E i C     G '>r2   rz   cuda_unsupportedc                     t               }|r|j                  d       | j                  j                  |v rt	        j
                  |       s2t        d| d| j                   d| j                  j                         y )NcudazX`fused=True` requires all the params to be floating point Tensors of supported devices: z but z and )r   removerw   ru   rC   rx   RuntimeErrordtype)rz   r   r{   s      r0   _device_dtype_check_for_fusedr      sz     CD&&v.HHMM449P9PQR9S""9!:%yahhmm_^
 	
 :Tr2   c                     t        |       D ]X  \  }}t        j                  |      st        j                  | |         | |<   |D ]  }t        j                  ||         ||<    Z y rL   )	enumeraterC   
is_complexview_as_real)rq   state_and_gradsirz   ss        r0   _view_as_realr      sg    &! 01A**6!95F1I$ 0))!A$/!00r2   c                     | rt         j                  S t        j                         t         j                  k(  rt         j                  S t         j                  S rL   )rC   float32get_default_dtypefloat64)is_fuseds    r0   _get_scalar_dtyper      s;    }}002emmCINr2   supports_xlac                     g d}t         j                  j                         s-|j                  t         j                  j                                | r|j                  d       |S )z?Return the device type list that supports capturable optimizer.)r   xpuhpuxla)rC   rM   rN   append_C_get_privateuse1_backend_name)r   capturable_supported_devicess     r0   !_get_capturable_supported_devicesr      sJ    #9 99!!#$++EHH,R,R,TU$++E2''r2   ac  foreach (bool, optional): whether foreach implementation of optimizer
            is used. If unspecified by the user (so foreach is None), we will try to use
            foreach over the for-loop implementation on CUDA, since it is usually
            significantly more performant. Note that the foreach implementation uses
            ~ sizeof(params) more peak memory than the for-loop version due to the intermediates
            being a tensorlist vs just one tensor. If memory is prohibitive, batch fewer
            parameters through the optimizer at a time or switch this flag to False (default: None)a  fused (bool, optional): whether the fused implementation is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None)

    .. note:: The foreach and fused implementations are typically faster than the for-loop,
              single-tensor implementation, with fused being theoretically fastest with both
              vertical and horizontal fusion. As such, if the user has not specified either
              flag (i.e., when foreach = fused = None), we will attempt defaulting to the foreach
              implementation when the tensors are all on CUDA. Why not fused? Since the fused
              implementation is relatively new, we want to give it sufficient bake-in time.
              To specify fused, pass True for fused. To force running the for-loop
              implementation, pass False for either foreach or fused. a   capturable (bool, optional): whether this instance is safe to
            capture in a CUDA graph. Passing True can impair ungraphed performance,
            so if you don't intend to graph capture this instance, leave it False
            (default: False)a]  differentiable (bool, optional): whether autograd should
            occur through the optimizer step in training. Otherwise, the step()
            function runs in a torch.no_grad() context. Setting to True can impair
            performance, so leave it False if you don't intend to run autograd
            through this instance (default: False)zmaximize (bool, optional): maximize the objective with respect to the
            params, instead of minimizing (default: False)hookc                 ^    t        j                  t              }| t        |j                  <   |S )a  Register a pre hook common to all optimizers.

    The hook should have the following signature::

        hook(optimizer, args, kwargs) -> None or modified args and kwargs

    Args:
        hook (Callable): A user defined hook which is registered on all optimizers.

    Returns:
        :class:`torch.utils.hooks.RemovableHandle`:
            a handle that can be used to remove the added hook by calling
            ``handle.remove()``
    )hooksr   r'   idr   handles     r0   r%   r%     s)     ""#>?F-1		*Mr2   c                 ^    t        j                  t              }| t        |j                  <   |S )a  Register a post hook common to all optimizers.

    The hook should have the following signature::

        hook(optimizer, args, kwargs) -> None

    Args:
        hook (Callable): A user defined hook which is registered on all optimizers.

    Returns:
        :class:`torch.utils.hooks.RemovableHandle`:
            a handle that can be used to remove the added hook by calling
            ``handle.remove()``
    )r   r   r(   r   r   s     r0   r&   r&   #  s)     ""#?@F.2 +Mr2   ParamsT_PRTc                   4   e Zd ZU dZeeeegee	eef      f   Z
eed<   eeeegdf   Zeed<   eee
f   ed<   eeef   ed<   ded<   d	ed
<   d	ed<   ded<   dedeeef   ddfdZdeeef   fdZdeeef   ddfdZdefdZd7dZd7dZedeeef   deeef   fd       Ze	 d8dededeee	d   e	ee f   f   ee	e!jD                  e!jF                  f   e	ee f   f   f   fd       Z$d7dZ%de
de&fdZ'dede&fd Z(	 d8ded gdf   d!ede&fd"Z)	 d8ded e*gee*   f   d!ede&fd#Z+e!jX                  de*fd$       Z-e	 d9d%e!j\                  d&e!j\                  d'ed(e/eeef      d)e0de!j\                  fd*       Z1	 d8ded e*gee*   f   d!ede&fd+Z2	 d8ded gdf   d!ede&fd,Z3e!jX                  d-e*ddfd.       Z4e!jX                  d:d/eddfd0       Z5e6d;d<d2       Z7e6d1eg e8f   de8fd3       Z7d9d1eeg e8f      dee8   fd4Z7e!jX                  d5eeef   ddfd6       Z9y)=r"   aX  Base class for all optimizers.

    .. warning::
        Parameters need to be specified as collections that have a deterministic
        ordering that is consistent between runs. Examples of objects that don't
        satisfy those properties are sets and iterators over values of dictionaries.

    Args:
        params (iterable): an iterable of :class:`torch.Tensor` s or
            :class:`dict` s. Specifies what Tensors should be optimized.
        defaults: (dict): a dict containing default values of optimization
            options (used when a parameter group doesn't specify them).
    OptimizerPreHookNOptimizerPostHook_optimizer_step_pre_hooks_optimizer_step_post_hooksz/OrderedDict[int, Callable[["Optimizer"], None]]_optimizer_state_dict_pre_hookszIOrderedDict[int, Callable[["Optimizer", StateDict], Optional[StateDict]]] _optimizer_state_dict_post_hooks$_optimizer_load_state_dict_pre_hooks%_optimizer_load_state_dict_post_hooksrq   r>   r+   c                    t         j                  j                  d       || _        t	               | _        t	               | _        t	               | _        t	               | _        t	               | _	        t	               | _
        | j                          t        |t         j                        r!t        dt        j                  |      z         t!        t"              | _        g | _        t)        |      }t+        |      dk(  rt-        d      t        |d   t"              sd|ig}|D ]!  }| j/                  t1        t"        |             # d| _        y )Nzpython.optimizerzZparams argument given to the optimizer should be an iterable of Tensors or dicts, but got r   z%optimizer got an empty parameter listrq   T)rC   r   _log_api_usage_oncer>   r   r   r   r   r   r   r   _patch_step_functionrO   rP   	TypeErrortypenamer   dictstateparam_groupsrg   lenrj   add_param_groupr	   $_warned_capturable_if_run_uncaptured)r/   rq   r>   r   param_groups        r0   __init__zOptimizer.__init__W  s   $$%78 )4&*5-'/:},0;-4?M15@]2!!#fell+<>CnnV>TU 
 6A5F
24F||!DEE,q/40%|45L' 	:K  dK!89	: 591r2   c                 J    | j                   | j                  | j                  dS )Nr>   r   r   r   r.   s    r0   __getstate__zOptimizer.__getstate__z  s#    ZZ --
 	
r2   r   c                    | j                   j                  |       d| j                   vrt               | _        d| j                   vrt               | _        d| j                   vrt               | _        d| j                   vrt               | _        d| j                   vrt               | _        d| j                   vrt               | _        | j                          | j                  j                  dd       y )	Nr   r   r   r   r   r   r:   F)__dict__updater   r   r   r   r   r   r   r   r>   
setdefault)r/   r   s     r0   __setstate__zOptimizer.__setstate__  s    U#&dmm;-8]D*'t}}<.9mD+,DMMA3>=D0-T]]B4?MD11F8CD52$--G9DD6!!#  !159r2   c                     | j                   j                  dz   }t        | j                        D ]F  \  }}|dz  }|d| dz  }t	        |j                               D ]  }|dk7  s	|d| d||    dz  } H |dz  }|S )Nz (
zParameter Group rq   z    z: ))	__class__r3   r   r   sortedrh   )r/   format_stringr   groupkeys        r0   r1   zOptimizer.__repr__  s    //$6!$"3"34 	BHAuT!M/s"55Mejjl+ B(?!tC55:,b%AAMB	B 	r2   c                     t               st        j                  j                  j	                         rt        j                  j                         rt        j                  j                         }|rAt        d | j                  D              s%t        d| j                  j                  z   dz         t        | dd      s=t        d | j                  D              r |st        j                  d       d| _        y y y y y y y )	Nc              3   &   K   | ]	  }|d      ywr\   Nr-   ry   r   s     r0   r|   z=Optimizer._cuda_graph_capture_health_check.<locals>.<genexpr>  s      %(-l#%   z;Attempting CUDA graph capture of step() for an instance of z' but param_groups' capturable is False.r   Fc              3   &   K   | ]	  }|d      ywr   r-   r   s     r0   r|   z=Optimizer._cuda_graph_capture_health_check.<locals>.<genexpr>  s     Kl+Kr   a   This instance was constructed with capturable=True or some of all the param_groups came with capturable=True, but step() is running without CUDA graph capture. If you never intend to graph-capture this instance, capturable=True can impair performance, and you should set capturable=False.T)r   rC   backendsr   is_builtis_availableis_current_stream_capturingr   r   r   r   r3   getattrwarningswarnr   )r/   	capturings     r0    _cuda_graph_capture_health_checkz*Optimizer._cuda_graph_capture_health_check  s     ##,,.

'')

>>@I %151B1B% " #Qnn--.?@  T#I5QK9J9JKK"m
 =A9 # L R * / r2   c                      y)a  Entry point for `torch.profile.profiler`.

        When python tracing is enabled the profiler will hook into this
        function at the CPython level to inspect the optimizer's parameters and
        param groups. It is called it after `step()` since many optimizers
        lazily initialize state.

        This is a workaround due to lack of a proper step hook on the optimizer,
        and will be removed if it exists.
        Nr-   r.   s    r0   _optimizer_step_codezOptimizer._optimizer_step_code  s    r2   rF   c                      t        j                         dt        j                  dt        j                  dt
        f fd       }|S )NrA   rB   r+   c                     | ^}}t        t        |      }d|j                  j                   d}t        j
                  j                  j                  |      5  t        t        j                         |j                  j                               D ]C  } ||| |      }|t        |t              rt        |      dk(  r|\  } }4t        	 d| d        	| i |}|j!                          t        |j"                  j                         t$        j                               D ]  } ||| |        |cd d d        S # 1 sw Y   y xY w)NzOptimizer.step#z.step   z@ must return None or a tuple of (new_args, new_kwargs), but got .)r	   r"   r   r3   rC   autogradprofilerrecord_functionr   r'   valuesr   rO   tupler   r   r   r   r(   )
rA   rB   r/   _profile_namepre_hookresultout	post_hookrF   s
            r0   rm   z,Optimizer.profile_hook_step.<locals>.wrapper  sI   HD1	4(D,T^^-D-D,EUKL((88F  %/6682299;! H &dD&9F)%fe4V9I+1LD&".#'&(hiohppq r#  D+F+))+ "'33::<0779" 2I dD&1	2 3  s   AD> BD>>E)rH   rk   r   rA   rB   r   )rF   rm   s   ` r0   profile_hook_stepzOptimizer.profile_hook_step  sB    			277 	bii 	A 	 
	> r2   tensorlistlistwith_indicesNNc           
      v    t               r$d| t        t        t        | d                     fiS t	        | |      S )zGroup a list of lists of tensors by device and dtype.

        Skips this step if we are compiling since this will occur during inductor lowering.
        r   r   )r   rg   ranger   r   )r   r   s     r0   r   z,Optimizer._group_tensors_by_device_and_dtype  s;     > >4c.QRBS>T8U3V"WXX5nlSSr2   c                 (   d| j                   j                   d| _        t        | j                   j                  dd       }|sP| j                  | j                   j                        | j                   _        d| j                   j                  _        y y )NzOptimizer.zero_grad#z
.zero_gradhookedT)r   r3   _zero_grad_profile_namer   stepr   r   )r/   r   s     r0   r   zOptimizer._patch_step_function  sv    "4>>#:#:";:F 	$ ,,h="&"8"89L9L"MDNN)-DNN& r2   r   c                 v    t        j                  | j                        }|| j                  |j                  <   |S )a  Register an optimizer step pre hook which will be called before optimizer step.

        It should have the following signature::

            hook(optimizer, args, kwargs) -> None or modified args and kwargs

        The ``optimizer`` argument is the optimizer instance being used. If
        args and kwargs are modified by the pre-hook, then the transformed
        values are returned as a tuple containing the new_args and new_kwargs.

        Args:
            hook (Callable): The user defined hook to be registered.

        Returns:
            :class:`torch.utils.hooks.RemovableHandle`:
                a handle that can be used to remove the added hook by calling
                ``handle.remove()``
        )r   r   r   r   r/   r   r   s      r0   register_step_pre_hookz Optimizer.register_step_pre_hook  s3    & &&t'E'EF48&&vyy1r2   c                 v    t        j                  | j                        }|| j                  |j                  <   |S )a  Register an optimizer step post hook which will be called after optimizer step.

        It should have the following signature::

            hook(optimizer, args, kwargs) -> None

        The ``optimizer`` argument is the optimizer instance being used.

        Args:
            hook (Callable): The user defined hook to be registered.

        Returns:
            :class:`torch.utils.hooks.RemovableHandle`:
                a handle that can be used to remove the added hook by calling
                ``handle.remove()``
        )r   r   r   r   r   s      r0   register_step_post_hookz!Optimizer.register_step_post_hook&  s3    " &&t'F'FG59''		2r2   prependc                     t        j                  | j                        }|| j                  |j                  <   |r'| j                  j	                  |j                  d       |S )a&  Register a state dict pre-hook which will be called before :meth:`~torch.optim.Optimizer.state_dict` is called.

        It should have the following signature::

            hook(optimizer) -> None

        The ``optimizer`` argument is the optimizer instance being used.
        The hook will be called with argument ``self`` before calling ``state_dict`` on ``self``.
        The registered hook can be used to perform pre-processing before the ``state_dict``
        call is made.

        Args:
            hook (Callable): The user defined hook to be registered.
            prepend (bool): If True, the provided pre ``hook`` will be fired before
                all the already registered pre-hooks on ``state_dict``. Otherwise,
                the provided ``hook`` will be fired after all the already registered
                pre-hooks. (default: False)

        Returns:
            :class:`torch.utils.hooks.RemoveableHandle`:
                a handle that can be used to remove the added hook by calling
                ``handle.remove()``
        Flast)r   r   r   r   move_to_endr/   r   r  r   s       r0   register_state_dict_pre_hookz&Optimizer.register_state_dict_pre_hook;  sS    4 &&t'K'KL:>,,VYY700<<VYYU<Sr2   c                     t        j                  | j                        }|| j                  |j                  <   |r'| j                  j	                  |j                  d       |S )ak  Register a state dict post-hook which will be called after :meth:`~torch.optim.Optimizer.state_dict` is called.

        It should have the following signature::

            hook(optimizer, state_dict) -> state_dict or None

        The hook will be called with arguments ``self`` and ``state_dict`` after generating
        a ``state_dict`` on ``self``. The hook may modify the state_dict inplace or optionally
        return a new one. The registered hook can be used to perform post-processing
        on the ``state_dict`` before it is returned.

        Args:
            hook (Callable): The user defined hook to be registered.
            prepend (bool): If True, the provided post ``hook`` will be fired before
                all the already registered post-hooks on ``state_dict``. Otherwise,
                the provided ``hook`` will be fired after all the already registered
                post-hooks. (default: False)

        Returns:
            :class:`torch.utils.hooks.RemoveableHandle`:
                a handle that can be used to remove the added hook by calling
                ``handle.remove()``
        Fr  )r   r   r   r   r  r  s       r0   register_state_dict_post_hookz'Optimizer.register_state_dict_post_hook[  sS    8 &&t'L'LM;?--fii811==fiie=Tr2   c                    | j                   j                         D ]
  } ||         i ddt        t        t        f   dt        t        t        f   ffd}| j
                  D cg c]
  } ||       }}| j                  j                         D ci c]0  \  }}t        |t        j                        rt        |         n||2 }}}||d}| j                  j                         D ]  }	 |	| |      }
|
|
} |S c c}w c c}}w )a  Return the state of the optimizer as a :class:`dict`.

        It contains two entries:

        * ``state``: a Dict holding current optimization state. Its content
            differs between optimizer classes, but some common characteristics
            hold. For example, state is saved per parameter, and the parameter
            itself is NOT saved. ``state`` is a Dictionary mapping parameter ids
            to a Dict with state corresponding to each parameter.
        * ``param_groups``: a List containing all parameter groups where each
            parameter group is a Dict. Each parameter group contains metadata
            specific to the optimizer, such as learning rate and weight decay,
            as well as a List of parameter IDs of the parameters in the group.

        NOTE: The parameter IDs may look like indices but they are just IDs
        associating state with param_group. When loading from a state_dict,
        the optimizer will zip the param_group ``params`` (int IDs) and the
        optimizer ``param_groups`` (actual ``nn.Parameter`` s) in order to
        match state WITHOUT additional verification.

        A returned state dict might look something like:

        .. code-block:: text

            {
                'state': {
                    0: {'momentum_buffer': tensor(...), ...},
                    1: {'momentum_buffer': tensor(...), ...},
                    2: {'momentum_buffer': tensor(...), ...},
                    3: {'momentum_buffer': tensor(...), ...}
                },
                'param_groups': [
                    {
                        'lr': 0.01,
                        'weight_decay': 0,
                        ...
                        'params': [0]
                    },
                    {
                        'lr': 0.001,
                        'weight_decay': 0.5,
                        ...
                        'params': [1, 2, 3]
                    }
                ]
            }

        r   r   r+   c           	      r   | j                         D ci c]  \  }}|dk7  s|| }}}j                  t        | d         D ci c]  \  }}t        |      vrt        |      |  c}}       | d   D cg c]  }t        |          c}|d<   t	        |d         z  |S c c}}w c c}}w c c}w Nrq   )itemsr   r   r   r   )r   kvpackedr   rz   param_mappingsstart_indexs         r0   
pack_groupz(Optimizer.state_dict.<locals>.pack_group  s    ',{{}Ftq!XadFFF!! !*%/; G1!uN2 qE1H @EXO!r!u 5OF83vh/00KM G  Ps   B(B(#B.
:B4r   r   )r   r   r   r7   r   r   r   r  rO   rC   rP   r   r   )r/   r   r  gr   r  r  packed_state
state_dictr   hook_resultr  r  s              @@r0   r  zOptimizer.state_dict}  s   d <<CCE 	HTN	 *,	d38n 	c3h 	 04/@/@A!
1AA 

((*
1 '1ELL&A^BqE"q1L
 
 "(


 >>EEG 	)I#D*5K&(
	) ! B
s   )D5Dparamvalueparam_idr   r   c                 p   d}d}|J |D ]!  }||d   v sd|v r|d   nd}d|v r|d   nd} n |dk(  r1|s|r+|j                  t        j                  | j                        S |S | j	                         r'|j                  | j
                  | j                        S |j                  | j                        S )NFrq   r   r\   r   )r   rw   )rw   )torC   r   rw   rx   r   )r  r  r  r   r   r   r\   pgs           r0   (_process_value_according_to_param_policyz2Optimizer._process_value_according_to_param_policy  s     
''' 	B2h<''."}7%1=1CR-
		
 &=UxxemmELLxII&&(xxekk%,,xGGxxu||x44r2   c                     t        j                  | j                        }|| j                  |j                  <   |r'| j                  j	                  |j                  d       |S )a  Register a load_state_dict pre-hook which will be called before
        :meth:`~torch.optim.Optimizer.load_state_dict` is called. It should have the
        following signature::

            hook(optimizer, state_dict) -> state_dict or None

        The ``optimizer`` argument is the optimizer instance being used and the
        ``state_dict`` argument is a shallow copy of the ``state_dict`` the user
        passed in to ``load_state_dict``. The hook may modify the state_dict inplace
        or optionally return a new one. If a state_dict is returned, it will be used
        to be loaded into the optimizer.

        The hook will be called with argument ``self`` and ``state_dict`` before
        calling ``load_state_dict`` on ``self``. The registered hook can be used to
        perform pre-processing before the ``load_state_dict`` call is made.

        Args:
            hook (Callable): The user defined hook to be registered.
            prepend (bool): If True, the provided pre ``hook`` will be fired before
                all the already registered pre-hooks on ``load_state_dict``. Otherwise,
                the provided ``hook`` will be fired after all the already registered
                pre-hooks. (default: False)

        Returns:
            :class:`torch.utils.hooks.RemoveableHandle`:
                a handle that can be used to remove the added hook by calling
                ``handle.remove()``
        Fr  )r   r   r   r   r  r  s       r0   !register_load_state_dict_pre_hookz+Optimizer.register_load_state_dict_pre_hook  sU    B &&t'P'PQ?C11&))<55AA&))RWAXr2   c                     t        j                  | j                        }|| j                  |j                  <   |r'| j                  j	                  |j                  d       |S )a^  Register a load_state_dict post-hook which will be called after
        :meth:`~torch.optim.Optimizer.load_state_dict` is called. It should have the
        following signature::

            hook(optimizer) -> None

        The ``optimizer`` argument is the optimizer instance being used.

        The hook will be called with argument ``self`` after calling
        ``load_state_dict`` on ``self``. The registered hook can be used to
        perform post-processing after ``load_state_dict`` has loaded the
        ``state_dict``.

        Args:
            hook (Callable): The user defined hook to be registered.
            prepend (bool): If True, the provided post ``hook`` will be fired before
                all the already registered post-hooks on ``load_state_dict``. Otherwise,
                the provided ``hook`` will be fired after all the already registered
                post-hooks. (default: False)

        Returns:
            :class:`torch.utils.hooks.RemoveableHandle`:
                a handle that can be used to remove the added hook by calling
                ``handle.remove()``
        Fr  )r   r   r   r   r  r  s       r0   "register_load_state_dict_post_hookz,Optimizer.register_load_state_dict_post_hook  sT    8 &&t'Q'QR@D22699=66BB699SXBYr2   r  c           	         |j                         }| j                  j                         D ]  } || |      }||} | j                  }t	        |d         }t        |      t        |      k7  rt        d      d |D        }d |D        }t        d t        ||      D              rt        d      t        t        t        j                  d |D              t        j                  d	 |D                          }dfd
	t        t              }	|d   j                         D ]&  \  }
}|
|v r||
   } |||
|d         |	|<   "||	|
<   ( dt        t        t         f   dt        t        t         f   dt        t        t         f   fd}t        ||      D cg c]  \  }} |||       }}}| j#                  |	|d       | j$                  j                         D ]
  } ||         yc c}}w )zLoad the optimizer state.

        Args:
            state_dict (dict): optimizer state. Should be an object returned
                from a call to :meth:`state_dict`.
        Nr   z<loaded state dict has a different number of parameter groupsc              3   8   K   | ]  }t        |d            ywrq   Nr   ry   r  s     r0   r|   z,Optimizer.load_state_dict.<locals>.<genexpr>X  s     71c!H+&7   c              3   8   K   | ]  }t        |d            ywr'  r(  r)  s     r0   r|   z,Optimizer.load_state_dict.<locals>.<genexpr>Y  s     =1c!H+&=r*  c              3   ,   K   | ]  \  }}||k7    y wrL   r-   )ry   p_lens_lens      r0   r|   z,Optimizer.load_state_dict.<locals>.<genexpr>Z  s     N,%u~Ns   z]loaded state dict contains a parameter group that doesn't match the size of optimizer's groupc              3   &   K   | ]	  }|d      ywr'  r-   r)  s     r0   r|   z,Optimizer.load_state_dict.<locals>.<genexpr>c  s     #FAAhK#Fr   c              3   &   K   | ]	  }|d      ywr'  r-   r)  s     r0   r|   z,Optimizer.load_state_dict.<locals>.<genexpr>d  s     #@AAhK#@r   c                 X    t        |t        j                        rt        j	                   ||      S t        |t
              r/|j                         D ci c]  \  }}|  ||       c}}S t        |t              r t        |       fd|D              S |S c c}}w )zBMake a deep copy of value, casting all tensors to device of param.)r  r   r   c              3   6   K   | ]  } |         yw)r  r   Nr-   )ry   r  _castr  r   r  s     r0   r|   z;Optimizer.load_state_dict.<locals>._cast.<locals>.<genexpr>v  s!     "pef5HS_#`#`"ps   )	rO   rC   rP   r"   r   r   r  r   ru   )r  r  r  r   r   r  r  r4  s   ` ``   r0   r4  z(Optimizer.load_state_dict.<locals>._casth  s    %. II5(L#  E4(
 !&	 1 uq8,TU   E8,"tE{"pjo"ppps   B&r   r3  r   	new_groupr+   c                     | d   |d<   |S r  r-   )r   r5  s     r0   update_groupz/Optimizer.load_state_dict.<locals>.update_group  s     #(/Ihr2   r  )NNN)copyr   r   r   r   r   rj   anyzipr   r   from_iterabler   r  r   r7   r   r   r   )r/   r  r   r  groupssaved_groups
param_lens
saved_lensid_mapr   r  r  r  r7  r  ngr   r   r4  s                     @r0   load_state_dictzOptimizer.load_state_dict>  s     __&
AAHHJ 	)H"44K&(
	) ""  
> :;v;#l++Q  87
==
N#j*2MNNC  ###F#FF###@#@@
	* <Gt;Lw'--/ 	DAqF{q	$1qz.7Q e a		S>	.238n	#s(^	 :=V\9RS2Q+SSE<HICCJJL 	IdO	 Ts   G,set_to_nonec                    | j                   j                  dd      xs | j                   j                  dd      }t        | d      s| j                          |rt	        d       }nd}t
        j                  j                  j                  | j                        5  | j                  D ]  }|d   D ]  }|j                  |rd|_        |j                  j                  |j                  j                          n|j                  j                  d       |r|j                  j                  r|j                  j!                          |J ||j                  j"                     |j                  j$                     j'                  |j                           |rC|J |j)                         D ],  }|j)                         D ]  }t        j*                  |        . ddd       y# 1 sw Y   yxY w)a  Reset the gradients of all optimized :class:`torch.Tensor` s.

        Args:
            set_to_none (bool): instead of setting to zero, set the grads to None.
                This will in general have lower memory footprint, and can modestly improve performance.
                However, it changes certain behaviors. For example:
                1. When the user tries to access a gradient and perform manual ops on it,
                a None attribute or a Tensor full of 0s will behave differently.
                2. If the user requests ``zero_grad(set_to_none=True)`` followed by a backward pass, ``.grad``\ s
                are guaranteed to be None for params that did not receive a gradient.
                3. ``torch.optim`` optimizers have a different behavior if the gradient is 0 or None
                (in one case it does the step with a gradient of 0 and in the other it skips
                the step altogether).
        r   Fr   r   c                       t        t              S rL   )r   rg   r-   r2   r0   <lambda>z%Optimizer.zero_grad.<locals>.<lambda>  s    [=N r2   Nrq   )r>   r]   hasattrr   r   rC   r   r   r   r   r   gradgrad_fndetach_requires_grad_	is_sparsezero_rw   r   r   r   _foreach_zero_)r/   rC  r   per_device_and_dtype_gradsr   rz   per_dtype_gradsgradss           r0   	zero_gradzOptimizer.zero_grad  s     --##Iu5 
9J9JU:
 t67%%'
 )45N)O&)-&^^$$44T5Q5QR 	4** 1x 1Avv)&%)AF vv~~9 ! 0 ! 5 5e <#*aff.>.> !'A'M M'M :166== I$%FFLL!""(&.11" 1==='A'H'H'J 4O!0!7!7!9 4,,U344)	4 	4 	4s   $G( DG((G1closurec                      y rL   r-   r/   rS  s     r0   r   zOptimizer.step      r2   c                      y rL   r-   rU  s     r0   r   zOptimizer.step  rV  r2   c                     t         )af  Perform a single optimization step to update parameter.

        Args:
            closure (Callable): A closure that reevaluates the model and
                returns the loss. Optional for most optimizers.

        .. note::
            Unless otherwise specified, this function should not modify the
            ``.grad`` field of the parameters.
        )NotImplementedErrorrU  s     r0   r   zOptimizer.step  s
     "!r2   r   c                    t        |t              st        dt        |             |d   }t        |t        j
                        r|g|d<   n)t        |t              rt        d      t        |      |d<   |d   D ]~  }t        |t        j
                        s!t        dt	        j                  |      z         | j                  j                  dd      r[|j                  rh|j                  rut        d       | j                  j                         D ]1  \  }}|t        u r||vrt        d|       |j!                  ||       3 |d   }t#        |      t#        t        |            k7  rt%        j&                  d	d
       t               }| j(                  D ]  }|j+                  t        |d                ! |j-                  t        |d               st        d      | j(                  j/                  |       y)a  Add a param group to the :class:`Optimizer` s `param_groups`.

        This can be useful when fine tuning a pre-trained network as frozen layers can be made
        trainable and added to the :class:`Optimizer` as training progresses.

        Args:
            param_group (dict): Specifies what Tensors should be optimized along with group
                specific optimization options.
        z$param_group must be a dict, but got rq   zoptimizer parameters need to be organized in ordered collections, but the ordering of tensors in sets will change between runs. Please use a list instead.z>optimizer can only optimize Tensors, but one of the params is r:   Nz can't optimize a non-leaf TensorzJparameter group didn't specify a value of required optimization parameter zoptimizer contains a parameter group with duplicate parameters; in future, this will cause an error; see github.com/pytorch/pytorch/issues/40967 for more information   )
stacklevelz7some parameters appear in more than one parameter group)rO   r   r   ru   rC   rP   setrg   r   r>   r]   is_leafretains_gradrj   r  requiredr   r   r   r   r   r   
isdisjointr   )r/   r   rq   r  namedefault	param_setr   s           r0   r   zOptimizer.add_param_group  s    +t,B4CTBUVWWX&fell+%+HK!$g 
 %)LK! * 		EEeU\\2027..2GH  ==$$%5t<!3!3 !CDD		E "]]002 	6MD'("t;'> `ae`fg  &&tW5	6 X&v;#c&k**MMS 	 (+u	&& 	3ESx12	3 ##CH(=$>?VWW  -r2   )r+   NFrL   T).)rS  Nr+   N):r3   r4   r5   r6   r   r   r   r    r   r   r   r   __annotations__r   r   intr   r7   r   r   r   r   r1   r   r   staticmethodr   r   r   r   boolr   r   rC   rw   r   r   r   r   r   r  r  r!   r
  rd   r  rP   r   r   r   r"  r$  rB  rR  r   r   floatr   r-   r2   r0   r"   r"   >  s0    #+D$+?%PTV\P\J]A^+^"_i_#+T4,@$,F#GyG#C)9$9:: $S*;%; <<%VV&qq*uu+\\!9w !9$sCx. !9T !9F
d38n 
:$sCx. :T :"	# 	%AN
 !Q !HRUO ! !F  #T&TT 
U:ng&= >>?U5<<,-u^W5L/MMN	P
T T .+;  .,= / , DIk]D01<@	F  Y/)1DDE    
	 D VI V Vp  5||5||5 5 4S>*	5
 5 
5 5B %Y/)1DDE% % 
	%P DI k]D01 <@ 	 D S) S S Sj 44T 44T 44 44l   HRY/ E  "HXb%i%89 "Xe_ " ;.4S> ;.d ;. ;.r2   rL   re  rf  )Tr6   rH   r   collectionsr   r   r8  r   	itertoolsr   typingr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   rC   torch.utils.hooksutilsr   torch._utilsr   torch.utils._foreach_utilsr   r   r   r   r   r   r   rg  r7   r    r!   rw   rP   
DeviceDictr#   r$   __all__r'   rh  r(   nn	parameter	Parameterrv   r*   r`  rJ   rT   rW   rp   rj  r   r   r   r   r   _foreach_doc
_fused_doc_capturable_doc_differentiable_doc_maximize_docr%   r&   r   r   r   r   r"   r-   r2   r0   <module>r~     s      0       9 8  ! ! %  . S/i !cN	 "CH~	9 %(5<<(%,,67
 %-$%f*=!>>% 	  &.{D&.I4.O%P  P
 BM T#'="=> NCN= d3(?#?@ P!LL%((*<*<*F*FG & & <>*h IN04AE
4:: /4

||

'+

	

0(D (DI (gJ
 
6 >+A o (,C  ( 8ELL18DcN3KKL Lt_CLCL^. ^.r2   