
    ǄgG                        U d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZmZmZ ddlZddlZddlmZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZm Z m!Z!m"Z"m#Z# ddl$m%Z%m&Z&m'Z' 	 ddlm(Z( da* ejV                         Z, ejZ                         Z.g a/eee
g df   ee0   f      e1d<    e2ejf                  dd       Z4eee0e5df   Z6dZ7dZ8	 ddlm9Z: 	 e:jv                  sddl<Z<nddl=Z=dZ7[:	  e       a@ eAejf                  d      rejf                  j                  ZBn ed      ZB eAejf                  d      rejf                  j                  ZDnde5de5fdZD eAejf                  d      rejf                  j                  ZFnde5de5fdZFdZGeHe1d<   ejf                  j                  ZJeHe1d<   dZKeejf                  j                     e1d<   deHfdZMdeHfd ZNdeHfd!ZOdd"eHfd#ZP ed$%      de6fd&       ZQd' ZRd(e0fd)ZSd* ZTd+ ZUd, ZVd- ZW eWeT        eWeU        G d. d/eX      ZYejf                  j                  ZZd0 Z[d1 Z\d2 Z] G d3 d4      Z^ G d5 d6e_      Z`d7e5ddfd8Za G d9 d:      Zb G d; d      Z G d< d=e      Zcde6ddfd>Zdddee6   de0fd?Zeddee6   dee5e5f   fd@Zfde6deBfdAZgde6dBe6deHfdCZh G dD dE      ZidFedG   deifdHZjdI ZkdFe'fdJZldeee5   ee0   f   fdKZmde5fdLZnde5fdMZodeee0      fdNZpdeee0      fdOZqdPee0   dQee0   dee5   fdRZrde5fdSZsde5fdTZtdeee5ef      de5fdUZudavee5   e1dV<   de5fdWZwdee0   fdXZxde0fdYZyde5fdZZzdde6ddfd[Z{d\ Z|ddee6   de'fd]Z}ddee6   de'fd^Z~d_ Zd`ee5e0f   ddfdaZde5fdbZddeeee5f      fdcZddeeee5f      fddZdeee5ef      de5fdeZddeeee5f      de5fdfZddeeee5f      de5fdgZddeeee5f      de5fdhZddeeee5f      de5fdiZddeeee5f      de5fdjZddeeee5f      de5fdkZddeeee5f      de5fdlZddeeee5f      de5fdmZddeeee5f      de5fdnZddeeee5f      de5fdoZdee5e0ej$                  f   dej$                  fdpZdej$                  dejf                  j                  fdqZ	 ddre5dee5e0ej$                  f   ddfdsZddee5e0ej$                  f   de5fdtZddul ddul edv        Z G dw dx      ZddylmZmZ  G dz d{e      Z G d| d}e      Z G d~ de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z[[ejP                  jS                  e       ejP                  jS                  e       ejP                  jS                  e       ejP                  jS                  e       ejP                  jS                  e       ejP                  jS                  e       ejP                  jS                  e       ejP                  jS                  e       ejP                  jS                  e       ejP                  jS                  e       ejP                  jS                  e       ejP                  jS                  e        G d d      Zd Z eWe       ddlmZmZmZmZmZmZ g dZy# e)$ r dZ(Y w xY w# e>$ r Y Sw xY w# [:w xY w# e)$ rZ?e?Z8Y dZ?[?hdZ?[?ww xY w)aM  
This package adds support for CUDA tensor types.

It implements the same function as CPU tensors, but they utilize
GPUs for computation.

It is lazily initialized, so you can always import it, and use
:func:`is_available()` to determine if your system supports CUDA.

:ref:`cuda-semantics` has more details about working with CUDA.
    N)	lru_cache)AnyCallablecastListOptionalTupleUniondevice)_dummy_type_LazySeedTrackerclassproperty)Device   )gds)_get_device_index)	CUDAGraphgraphgraph_pool_handleis_current_stream_capturingmake_graphed_callables)EventExternalStreamStream)_cudartF_queued_calls_cuda_isInBadForkc                       yNF r!       [/home/mcse/projects/flask_80/flask-venv/lib/python3.12/site-packages/torch/cuda/__init__.py<lambda>r$   3   s    r"   )versionT_CudaDeviceProperties_cuda_exchangeDevicer   returnc                 $    | dk  ryt        d      Nr   z)PyTorch was compiled without CUDA supportRuntimeErrorr   s    r#   _exchange_devicer.   U       A:FGGr"   _cuda_maybeExchangeDevicec                 $    | dk  ryt        d      r*   r,   r   s    r#   _maybe_exchange_devicer2   _   r/   r"   has_half	has_magmar!   default_generatorsc                  6    t        t        j                  d      S )z)Return true if compile with CUDA support._cuda_getDeviceCount)hasattrtorch_Cr!   r"   r#   _is_compiledr;   k   s    588344r"   c                  2    t        j                  d      dk(  S )NPYTORCH_NVML_BASED_CUDA_CHECK1)osgetenvr!   r"   r#   _nvml_based_availrA   p   s    9945<<r"   c                      t               syt               rt               dkD  S t        j                  j                         dkD  S )z8Return a bool indicating if CUDA is currently available.Fr   )r;   rA   device_countr9   r:   r7   r!   r"   r#   is_availablerD   t   s9    > ~!!
 xx,,.22r"   including_emulationc                 x   t         j                  j                  ryt               syt         j                  j                         }t         j                  j                  }|Mt        |j                  d      d         dk\  r-t         j                  j                  |      j                  dk\  ry| syt        |      S )zQReturn a bool indicating if the current CUDA/ROCm device supports dtype bfloat16.TF.r         )r9   r%   hiprD   cudacurrent_deviceintsplitget_device_propertiesmajor_check_bf16_tensor_supported)rE   r   cuda_versions      r#   is_bf16_supportedrS      s     }} >ZZ&&(F ==%%L ""3'*+r1JJ,,V4::a? (//r"      )maxsizec                 r    	 t        j                  dgt         j                  |        y# t        $ r Y yw xY w)Ng      ?)dtyper   TF)r9   tensorbfloat16	Exceptionr   s    r#   rQ   rQ      s2    cU%..@ s   '* 	66c                 B    t         j                  j                  |        y N)r9   r:   _cuda_sleep)cycless    r#   _sleepr_      s    	HH r"   arch_stringc                 l    | j                  d      d   }|j                  d      r|dd }t        |      S )z4Extracts the architecture string from a CUDA version_r   aNr+   )rN   endswithrM   )r`   bases     r#   _extract_arch_versionrf      s8    S!!$D}}SCRyt9r"   c                     d} d}t         j                  j                  t         j                  j	                         }t        t                     D ]  }t        |      }|d   }|d   }t        |      }|dz  |z   }t        d t         j                  j                         D        d      }	||	k  sat        j                  ||||||	dz  |	dz  fz          y y )	Nz
    Found GPU%d %s which requires CUDA_VERSION >= %d to
     work properly, but your PyTorch was compiled
     with CUDA_VERSION %d. Please install the correct PyTorch binary
     using instructions from https://pytorch.org
    z
    Found GPU%d %s which is of cuda capability %d.%d.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability supported by this library is %d.%d.
    r   r   
   c              3   2   K   | ]  }t        |        y wr\   )rf   ).0archs     r#   	<genexpr>z$_check_capability.<locals>.<genexpr>   s     T&t,Ts   #   )default)r9   r%   rK   r:   _cuda_getCompiledVersionrangerC   get_device_capabilityget_device_nameminget_arch_listwarningswarn)
incorrect_binary_warnold_gpu_warnCUDA_VERSIONd
capabilityrP   minornamecurrent_archmin_archs
             r#   _check_capabilityr      s    L }}%xx88:|~& 	A.q1JqMEqME"1%D 2:-LT9Q9Q9STH h& $uh"nhmLM	 &r"   c            
        	 d} t         j                  j                  y t               }t	        |      dk(  ry |D cg c]  }d|v st        |       }}t        t                     D ]p  }t        |      \  	}t        	fd|D              }|r(t        |      }	dz  |z   }t        j                  | j                  ||dj                  |      |             r y c c}w )Na	  
{} with CUDA capability sm_{} is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities {}.
If you want to use the {} GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
r   sm_c              3   .   K   | ]  }|d z  k(    yw)rh   Nr!   )rj   sm	cap_majors     r#   rl   z _check_cubins.<locals>.<genexpr>   s     E"bI-Es   rh    )r9   r%   rK   rt   lenrf   rp   rC   rq   anyrr   ru   rv   formatjoin)
incompatible_device_warn	arch_listrk   supported_smidx	cap_minor	supporteddevice_namer{   r   s
            @r#   _check_cubinsr      s     
 }}!I
9~<EWDRV)$/WLW\^$ 4S9	9EEE	)#.K"R)3JMM(//SXXi-@+ Xs   	CCc                  (    t         xr t                S )z9Return whether PyTorch's CUDA state has been initialized.)_initialized_is_in_bad_forkr!   r"   r#   is_initializedr      s    1 111r"   c                 f   t               r |         y |j                  dd      r)t        j                  | t	        j
                                y |j                  dd      r)t        j                  | t	        j
                                y t        j                  | t	        j
                         f       y )Nseed_allFseed)	r   get_lazy_seed_trackerqueue_seed_all	tracebackformat_stack
queue_seedr   append)callablekwargss     r#   
_lazy_callr      s{    
 ::j%(--h	8N8N8PQZZ&))(I4J4J4LM   (I,B,B,D!EFr"   c                       e Zd Zy)DeferredCudaCallErrorN)__name__
__module____qualname__r!   r"   r#   r   r     s    r"   r   c                      t                y)a  Initialize PyTorch's CUDA state.

    You may need to call this explicitly if you are interacting with
    PyTorch via its C API, as Python bindings for CUDA functionality
    will not be available until this initialization takes place.
    Ordinary users should not need this, as all of PyTorch's CUDA methods
    automatically initialize CUDA state on-demand.

    Does nothing if the CUDA state is already initialized.
    N)
_lazy_initr!   r"   r#   initr     s	     Lr"   c            	      $   t               st        t        d      ry t        5  t               r
	 d d d        y t	               rt        d      t        t        j                  d      st        d      t        t        d      dt        j                  vrdt        j                  d<   t        j                  j                          dt        _        t        j                         D ]  } | st         j#                  |         	 t         D ]  \  }}	  |         	 t-        t        d       dad d d        y # t$        $ r1}d	t'        |       d
dj)                  |       }t+        |      |d }~ww xY w# t-        t        d       w xY w# 1 sw Y   y xY w)Nis_initializingzwCannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start methodr7   z$Torch not compiled with CUDA enabledzGlibcudart functions unavailable. It looks like you have a broken build?CUDA_MODULE_LOADINGLAZYTz6CUDA call failed lazily at initialization with error: z(

CUDA call was originally invoked at:

 )r   r8   _tls_initialization_lockr   r-   r9   r:   AssertionErrorr   r?   environ
_cuda_initr   r   	get_callsr   r   rZ   strr   r   delattrr   )callsqueued_callorig_tracebackemsgs        r#   r   r   !  s   74):;	 0 0 0 I  uxx!78 !GHH? Y 
 !

206BJJ,-  $'113 	,E$$U+	,	-/< <+^<M< D+,a0 0P ! <PQTUVQWPX YCCE77>CZB[]  04!;< D+,_0 0sS   FB2F*FE1D4E1F4	E.=,E))E..E11FFFc                  "    t                t        S )a 	  Retrieves the CUDA runtime API module.


    This function initializes the CUDA runtime environment if it is not already
    initialized and returns the CUDA runtime API module (_cudart). The CUDA
    runtime API module provides access to various CUDA runtime functions.

    Args:
        ``None``

    Returns:
        module: The CUDA runtime API module (_cudart).

    Raises:
        RuntimeError: If CUDA cannot be re-initialized in a forked subprocess.
        AssertionError: If PyTorch is not compiled with CUDA support or if libcudart functions are unavailable.

    Example of CUDA operations with profiling:
        >>> import torch
        >>> from torch.cuda import cudart, check_error
        >>> import os
        >>>
        >>> os.environ['CUDA_PROFILE'] = '1'
        >>>
        >>> def perform_cuda_operations_with_streams():
        >>>     stream = torch.cuda.Stream()
        >>>     with torch.cuda.stream(stream):
        >>>         x = torch.randn(100, 100, device='cuda')
        >>>         y = torch.randn(100, 100, device='cuda')
        >>>         z = torch.mul(x, y)
        >>>     return z
        >>>
        >>> torch.cuda.synchronize()
        >>> print("====== Start nsys profiling ======")
        >>> check_error(cudart().cudaProfilerStart())
        >>> with torch.autograd.profiler.emit_nvtx():
        >>>     result = perform_cuda_operations_with_streams()
        >>>     print("CUDA operations completed.")
        >>> check_error(torch.cuda.cudart().cudaProfilerStop())
        >>> print("====== End nsys profiling ======")

    To run this example and save the profiling information, execute:
        >>> $ nvprof --profile-from-start off --csv --print-summary -o trace_name.prof -f -- python cudart_test.py

    This command profiles the CUDA operations in the provided script and saves
    the profiling information to a file named `trace_name.prof`.
    The `--profile-from-start off` option ensures that profiling starts only
    after the `cudaProfilerStart` call in the script.
    The `--csv` and `--print-summary` options format the profiling output as a
    CSV file and print a summary, respectively.
    The `-o` option specifies the output file name, and the `-f` option forces the
    overwrite of the output file if it already exists.
    )r   r   r!   r"   r#   cudartr   X  s    l LNr"   c                   *    e Zd ZU dZeed<   dZeed<   y)
cudaStatusr   SUCCESS"   ERROR_NOT_READYN)r   r   r   r   rM   __annotations__r   r!   r"   r#   r   r     s    GSOSr"   r   c                   (     e Zd Zdeddf fdZ xZS )	CudaErrorcoder(   Nc                     t        j                  t        j                  |            }t        |   | d| d       y )Nz ())r   cudaGetErrorString	cudaErrorsuper__init__)selfr   r   	__class__s      r#   r   zCudaError.__init__  s8    (():):4)@AC54&*+r"   )r   r   r   rM   r   __classcell__r   s   @r#   r   r     s    ,S ,T , ,r"   r   resc                 T    | t         j                  j                  k7  rt        |       y r\   )r   r   successr   )r   s    r#   check_errorr     s%    
g'''n (r"   c                   2    e Zd ZdefdZd ZdededefdZy)	_DeviceGuardindexc                      || _         d| _        y Nr+   )r   prev_idx)r   r   s     r#   r   z_DeviceGuard.__init__  s    r"   c                 `    t         j                  j                  | j                        | _        y r\   r9   rK   r.   r   r   r   s    r#   	__enter__z_DeviceGuard.__enter__      

33DHH=r"   typevaluer   c                 `    t         j                  j                  | j                        | _        yr    r9   rK   r2   r   r   r   r   r   r   s       r#   __exit__z_DeviceGuard.__exit__      ::44T]]Cr"   N)r   r   r   rM   r   r   r   r   r!   r"   r#   r   r     s-    c >S   r"   r   c                   6    e Zd ZdZd efdZd ZdededefdZy)	r   zContext-manager that changes the selected device.

    Args:
        device (torch.device or int): device index to select. It's a no-op if
            this argument is a negative integer or ``None``.
    c                 6    t        |d      | _        d| _        y )NToptionalr+   )r   r   r   )r   r   s     r#   r   zdevice.__init__  s    $Vd;r"   c                 `    t         j                  j                  | j                        | _        y r\   r   r   s    r#   r   zdevice.__enter__  r   r"   r   r   r   c                 `    t         j                  j                  | j                        | _        yr    r   r   s       r#   r   zdevice.__exit__  r   r"   N)r   r   r   __doc__r   r   r   r   r!   r"   r#   r   r     s2    s >S   r"   c                   "     e Zd ZdZ fdZ xZS )	device_ofa  Context-manager that changes the current device to that of given object.

    You can use both tensors and storages as arguments. If a given object is
    not allocated on a GPU, this is a no-op.

    Args:
        obj (Tensor or Storage): object allocated on the selected device.
    c                 `    |j                   r|j                         nd}t        |   |       y r   )is_cuda
get_devicer   r   )r   objr   r   s      r#   r   zdevice_of.__init__  s$    "%++cnn2r"   )r   r   r   r   r   r   r   s   @r#   r   r     s     r"   r   c                 d    t        |       } | dk\  r t        j                  j                  |        yy)a=  Set the current device.

    Usage of this function is discouraged in favor of :any:`device`. In most
    cases it's better to use ``CUDA_VISIBLE_DEVICES`` environmental variable.

    Args:
        device (torch.device or int): selected device. This function is a no-op
            if this argument is negative.
    r   N)r   r9   r:   _cuda_setDevicer   s    r#   
set_devicer     s,     v&F{  ( r"   c                 ,    t        |       j                  S )a  Get the name of a device.

    Args:
        device (torch.device or int or str, optional): device for which to return the
            name. This function is a no-op if this argument is a negative
            integer. It uses the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Returns:
        str: the name of the device
    )rO   r}   r   s    r#   rr   rr     s     !(---r"   c                 H    t        |       }|j                  |j                  fS )a  Get the cuda capability of a device.

    Args:
        device (torch.device or int or str, optional): device for which to return the
            device capability. This function is a no-op if this argument is
            a negative integer. It uses the current device, given by
            :func:`~torch.cuda.current_device`, if :attr:`device` is ``None``
            (default).

    Returns:
        tuple(int, int): the major and minor cuda capability of the device
    )rO   rP   r|   )r   props     r#   rq   rq     s!     !(D::tzz!!r"   c                     t                t        | d      } | dk  s| t               k\  rt        d      t	        |       S )zGet the properties of a device.

    Args:
        device (torch.device or int or str): device for which to return the
            properties of the device.

    Returns:
        _CudaDeviceProperties: the properties of the device
    Tr   r   Invalid device id)r   r   rC   r   _get_device_propertiesr   s    r#   rO   rO     s<     Lv5FzV|~-011!&))r"   peer_devicec                     t                t        | d      } t        |      }| dk  s| t               k\  rt        d      |dk  s|t               k\  rt        d      t        j
                  j                  | |      S )z5Check if peer access between two devices is possible.Tr   r   r   zInvalid peer device id)r   r   rC   r   r9   r:   _cuda_canDeviceAccessPeer)r   r   s     r#   can_device_access_peerr    sl    Lv5F#K0KzV|~-011Q+756688--fkBBr"   c                   N    e Zd ZU dZed   ed<   ded   fdZd Zdeded	efd
Z	y)StreamContexta  Context-manager that selects a given stream.

    All CUDA kernels queued within its context will be enqueued on a selected
    stream.

    Args:
        Stream (Stream): selected stream. This manager is a no-op if it's
            ``None``.
    .. note:: Streams are per-device.
    torch.cuda.Stream
cur_streamstreamc                    || _         t        d d      | _        t        j                  j                         s| j                  d| _        t        j                  j                         sd nt        j                  j                  d       | _        t        j                  j                         sd | _	        y t        j                  j                  d       | _	        y )NTr+   )
r  r   r   r9   jitis_scriptingrK   default_streamsrc_prev_streamdst_prev_stream)r   r  s     r#   r   zStreamContext.__init__+  s    $T40yy%%'xx 		..0Dejj6O6OPT6U 	 		..0D 	6;jj6O6OPT6U 	r"   c                    | j                   }|| j                  dk(  ry t        j                  j	                  d       | _        | j
                  j                  |j                  k7  rLt        |j                        5  t        j                  j	                  |j                        | _        d d d        t        j                  j                  |       y # 1 sw Y   )xY wr   )	r  r   r9   rK   current_streamr  r   r  
set_stream)r   r  s     r#   r   zStreamContext.__enter__9  s    [[
R$zz88> &&**;*;;
))* T',zz'@'@ARAR'S$T

j)T Ts   ;/CCr   r   r   c                 *   | j                   }|| j                  dk(  ry | j                  j                  |j                  k7  r)t        j
                  j                  | j                         t        j
                  j                  | j                         y r   )r  r   r  r   r9   rK   r  r  )r   r   r   r   r  s        r#   r   zStreamContext.__exit__H  sj    [[
R &&**;*;;JJ!!$"6"67

d223r"   N)
r   r   r   r   r   r   r   r   r   r   r!   r"   r#   r  r    sF    	 ,--
x(;< 
*4S 4 4 4r"   r  r  r  c                     t        |       S )aM  Wrap around the Context-manager StreamContext that selects a given stream.

    Arguments:
        stream (Stream): selected stream. This manager is a no-op if it's
            ``None``.
    ..Note:: In eager mode stream is of type Stream class while in JIT it is
    an object of the custom class ``torch.classes.cuda.Stream``.
    )r  r  s    r#   r  r  V  s       r"   c                 H    t         j                  j                  | ||       y)zset stream specified by the stream id, device index and
        device type

    Args: stream_id (int): stream id in stream pool
          device_index (int): device index in topo
          device_type (int): enum device type
    	stream_iddevice_indexdevice_typeN)r9   r:   _cuda_setStreamr  s      r#   _set_stream_by_idr  b  s$     
HH!  r"   c                 b    | yt        | j                  | j                  | j                         y)a  Set the current stream.This is a wrapper API to set the stream.
        Usage of this function is discouraged in favor of the ``stream``
        context manager.

    Args:
        stream (Stream): selected stream. This function is a no-op
            if this argument is ``None``.
    Nr  )r  r  r  r  r  s    r#   r  r  q  s/     ~""((&&r"   c                  L   t        j                  d      } t        j                  j                  rt        j                  d      }||} | t        t        d            S dt        dt        fd}dt        dt        dt        t           fd	}| j                  d
      r	 || d
      S | j                  d      r	 || d      S g }| j                  d      D ]N  } ||j                               }||v rt        t        t           g       c S |dk  r |S |j                  |       P |S )z0Parse CUDA_VISIBLE_DEVICES environment variable.CUDA_VISIBLE_DEVICESHIP_VISIBLE_DEVICES@   sr(   c                     | syt        |       D ]7  \  }}|j                         s|dk(  r|dv s n|dz   t        |       k(  s3|dz  }9 dkD  rt        | d|       S dS )z:Return -1 or positive integer sequence string starts with.r+   r   z+-r   N)	enumerateisdigitr   rM   )r   r   cs      r#   _strtoulz(_parse_visible_devices.<locals>._strtoul  sm    l 	FCIIKC1HdQw#a& q		
  #Qws1Tc7|.B.r"   lstprefixc                     g }| j                  d      D ]D  }||v rt        t        t           g       c S |j	                  |      s |S |j                  |       F |S )N,)rN   r   r   r   
startswithr   )r&  r'  rcselems       r#   parse_list_with_prefixz6_parse_visible_devices.<locals>.parse_list_with_prefix  s_    IIcN 	Ds{DIr**??6*
 JJt	 
r"   zGPU-MIG-r)  r   )r?   r@   r9   r%   rJ   listrp   r   rM   r   r*  rN   stripr   r   )varhip_devicesr%  r-  rcr,  xs          r#   _parse_visible_devicesr5    s   
))*
+C}}ii 56"C
{E"I	/C 	/C 	/
C 
 
c 
 ~~f%c622
~~f%c622 B		# TZZ\"7S	2&&q5I 			! Ir"   c                      t         sy	 t        j                          t        j                         }t        |      S # t        j                  $ r,} t	        j
                  d| j                          Y d } ~ yd } ~ ww xY w)Nr+   z&Can't initialize amdsmi - Error code: )	_HAS_PYNVMLamdsmiamdsmi_initAmdSmiExceptionru   rv   err_codeamdsmi_get_processor_handlesr   )r   socket_handless     r#   _raw_device_count_amdsmir>    sg     88:N~	 !! >qzzlKLs   < A;"A66A;c                  
   ddl m} m}m}  |d      }|j	                         }|dk7  rt        j                  d       y |d      }|j                   | |            }|dk7  rt        j                  d       y~|j                  S )zgReturn number of devices as reported by NVML or negative value if NVML discovery/initialization failed.r   )byrefc_intCDLLlibnvidia-ml.so.1Can't initialize NVMLr+   Can't get nvml device count)	ctypesr@  rA  rB  nvmlInitru   rv   nvmlDeviceGetCount_v2r   )r@  rA  rB  nvml_hr3  	dev_counts         r#   _raw_device_count_nvmlrK    sx    ))%&F		B	Qw-.b	I		%	%eI&6	7B	Qw34??r"   c                     ddl m} m}m}m}m} t        sy 	 t        j                          	 t        j                         }t        |      }g }t        |      D ]J  }	 t        j                         |   }		 t        j                  |	      }
|j!                  t#        |
             L |S # t        j                  $ r t        j                  d       Y y w xY w# t        j                  $ r t        j                  d       Y y w xY w# t        j                  $ r t        j                  d       Y  y w xY w# t        j                  $ r t        j                  d       Y  y w xY w)Nr   r@  rA  c_void_prB  create_string_bufferzCan't initialize amdsmizCan't get amdsmi device countzCannot get amd device handlerzCannot get uuid for amd device)rF  r@  rA  rN  rB  rO  r7  r8  r9  r:  ru   rv   r<  r   rp   amdsmi_get_gpu_device_uuidr   r   )r@  rA  rN  rB  rO  r=  rJ  uuidsr   handleruuids              r#   _raw_device_uuid_amdsmirT    s6   II<<>'	 EY  	99;C@G	44W=D 	SY  L- !! /0 !! 56 %% 	MM9:	
 %% 	MM:;	sF   B' C D4D2'(CC(D ?D (D/.D/2(EEc                     ddl m} m}m}m}m}  |d      }|j                         }|dk7  rt        j                  d       y |d      }|j                   | |            }|dk7  rt        j                  d       yg }t        |j                        D ]  }	 |       }
|j                  |	 | |
            }|dk7  rt        j                  d        yd	} ||      }|j                  |
||      }|dk7  rt        j                  d
        y|j                  |j                  j!                  d      j#                  d              ~|S )z^Return list of device UUID as reported by NVML or None if NVM discovery/initialization failed.r   rM  rC  rD  Nr+   rE  zCan't get device handle`   zCan't get device UUIDascii )rF  r@  rA  rN  rB  rO  rG  ru   rv   rH  rp   r   nvmlDeviceGetHandleByIndex_v2nvmlDeviceGetUUIDr   rawdecoder0  )r@  rA  rN  rB  rO  rI  r3  rJ  rQ  r   dev_idbuf_lenbufs                r#   _raw_device_uuid_nvmlr`    s$   II%&F		B	Qw-.b	I		%	%eI&6	7B	Qw34EY__% :11#uV}E7MM34"7+%%fc7;7MM12SWW^^G,22489: 	Lr"   
candidatesrQ  c                     dt         dt        t            dt        fd}g }| D ]A  } |||      }|dk  r |S ||v rt        t        t           g       c S |j	                  |       C |S )zqGiven the set of partial uuids and list of known uuids builds a set of ordinals excluding ambiguous partials IDs.	candidaterQ  r(   c                 f    d}t        |      D ]   \  }}|j                  |       s|dk7  r y|}" |S r   )r"  r*  )rc  rQ  
best_matchr   rS  s        r#   uuid_to_orinalz3_transform_uuid_to_ordinals.<locals>.uuid_to_orinal  sF    
"5) 	IC??9-RJ	 r"   r   )r   r   rM   r   r   )ra  rQ  rf  r3  rc  r   s         r#   _transform_uuid_to_ordinalsrg    s    	# 	d3i 	C 	 B 	Y.7
 I "9S	2&&
		# Ir"   c                     t               } | sy	 t        | d         t        u ryt               }|dk  r|S t	        |       D ]  \  }}t        t        |      |k\  s|c S  	 t        |       S # t        $ r Y yt        $ r Y yw xY w)Nr   r+   )
r5  r   r   r>  r"  r   rM   OSErrorAttributeErrorr   )visible_devicesraw_cntr   vals       r#   _device_count_amdsmirn  2  s    ,.O"#s*.0G!|%o6 SS>W,J 	   s-   A+ A+ $A+ A+ A+ +	B6B Bc                     t               } | sy	 t        | d         t        u rD| d   j                  d      ryt	               }|yt        t        t        t           |       |      } n;t               }|dk  r|S t        |       D ]  \  }}t        t        |      |k\  s|c S  t        |       S # t        $ r Y yt        $ r Y yw xY w)zReturn number of devices as reported by NVML taking CUDA_VISIBLE_DEVICES into account.

    Negative value is returned if NVML discovery or initialization has failed.
    r   r.  r+   )r5  r   r   r*  r`  rg  r   r   rK  r"  rM   ri  rj  r   )rk  rQ  rl  r   rm  s        r#   _device_count_nvmlrp  H  s    
 -.O"#s*q!,,V4)+E}9T#Y0%O -.G!|%o6 SS>W,J 	   s4   (B- B- 2B- 8$B- B- !B- -	C8CCc                 J   t        | d      }t               }t        |d         t        u r8t	               }|t        d      t        t        t        t           |      |      }t        t        t           |      }|dk  s|t        |      k\  rt        d| d| d      ||   S )zNReturn the NVML index of the device, taking CUDA_VISIBLE_DEVICES into account.Tr   r   zCan't get device UUIDsdevice z& is not visible (CUDA_VISIBLE_DEVICES=r   )r   r5  r   r   r`  r-   rg  r   r   rM   r   )r   r   rk  rQ  s       r#   _get_nvml_device_indexrs  j  s    
FT
2C,.OOA3&%'=7885cO,e
 49o6O
Qw#_--cU@@QQRS
 	
 3r"   _cached_device_countc                      t               syt        t        S t        j                  j                  r
t               n	t               } | dk  rt        j                  j                         n| }t        r|a|S )z$Return the number of GPUs available.r   )
r;   rt  r9   r%   rJ   rn  rp  r:   r7   r   )
nvml_countrs     r#   rC   rC     s]     >'##+0==+<+<%'BTBVJ+5>%%'zA  Hr"   c                  ~    t               sg S t        j                  j                         } | g S | j	                         S )z=Return list CUDA architectures this library was compiled for.)rD   r9   r:   _cuda_getArchFlagsrN   )
arch_flagss    r#   rt   rt     s8    >	,,.J	r"   c                      t               } t        |       dk(  ry| D cg c]  }|j                  d       }}dj                  |D cg c]  \  }}d| d| d|  c}}      S c c}w c c}}w )z9Return NVCC gencode flags this library was compiled with.r   r   rb   r   z-gencode compute=compute_z,code=)rt   r   rN   r   )r   rk   
arch_list_kinds       r#   get_gencode_flagsr~    s}    I
9~.78d$**S/8J888 !+	
t (vVD64&A	
  9	
s   A%A*
c                  R    t                t        j                  j                         S )z0Return the index of a currently selected device.)r   r9   r:   _cuda_getDevicer!   r"   r#   rL   rL     s    L88##%%r"   c                     t                t        j                  j                  |       5  t        j                  j                         cddd       S # 1 sw Y   yxY w)a,  Wait for all kernels in all streams on a CUDA device to complete.

    Args:
        device (torch.device or int, optional): device for which to synchronize.
            It uses the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).
    N)r   r9   rK   r   r:   _cuda_synchronizer   s    r#   synchronizer    s@     L			6	" ,xx))+, , ,s   AAc                  R    t                t        j                  j                         S )ax  Force collects GPU memory after it has been released by CUDA IPC.

    .. note::
        Checks if any sent CUDA tensors could be cleaned from the memory. Force
        closes shared memory file used for reference counting if there is no
        active counters. Useful when the producer process stopped actively sending
        tensors and want to release unused memory.
    )r   r9   r:   _cuda_ipc_collectr!   r"   r#   ipc_collectr    s     L88%%''r"   c                     t                t        j                  j                  t	        | d            }t        |d   |d   |d         S )aS  Return the currently selected :class:`Stream` for a given device.

    Args:
        device (torch.device or int, optional): selected device. Returns
            the currently selected :class:`Stream` for the current device, given
            by :func:`~torch.cuda.current_device`, if :attr:`device` is ``None``
            (default).
    Tr   r   r      r  )r   r9   r:   _cuda_getCurrentStreamr   r   r   
streamdatas     r#   r  r    J     L00&40J Q-jmTU r"   c                     t                t        j                  j                  t	        | d            }t        |d   |d   |d         S )a=  Return the default :class:`Stream` for a given device.

    Args:
        device (torch.device or int, optional): selected device. Returns
            the default :class:`Stream` for the current device, given by
            :func:`~torch.cuda.current_device`, if :attr:`device` is ``None``
            (default).
    Tr   r   r   r  r  )r   r9   r:   _cuda_getDefaultStreamr   r   r  s     r#   r  r    r  r"   c                  R    t                t        j                  j                         S )z6Return cublasHandle_t pointer to current cuBLAS handle)r   r9   r:   _cuda_getCurrentBlasHandler!   r"   r#   current_blas_handler    s    L88..00r"   
debug_modec                     t                t        | t              r#| dk(  rd} n| dk(  rd} n| dk(  rd} nt        d      t        j
                  j                  |        y)	a  Set the debug mode for cuda synchronizing operations.

    Args:
        debug_mode(str or int): if "default" or 0, don't error or warn on synchronizing operations,
            if "warn" or 1, warn on synchronizing operations, if "error" or 2, error out synchronizing operations.

    Warning:
        This is an experimental feature, and not all synchronizing operations will trigger warning or error. In
        particular, operations in torch.distributed and torch.sparse namespaces are not covered yet.
    rn   r   rv   r   errorr  zGinvalid value of debug_mode, expected one of `default`, `warn`, `error`N)r   
isinstancer   r-   r9   r:   _cuda_set_sync_debug_mode)r  s    r#   set_sync_debug_moder    s^     L*c""J6!J7"JY  
HH&&z2r"   c                  R    t                t        j                  j                         S )zEReturn current value of debug mode for cuda synchronizing operations.)r   r9   r:   _cuda_get_sync_debug_moder!   r"   r#   get_sync_debug_moder    s    L88--//r"   c                     t         st        d      t        ddlm} 	 t        j
                          t        |       } t        j                  |       }|S # |$ r}t        d      |d }~ww xY w)Nz=pynvml does not seem to be installed or it can't be imported.r   )NVMLError_DriverNotLoadedz-cuda driver can't be loaded, is cuda enabled?)	r7  ModuleNotFoundError_PYNVML_ERRpynvmlr  rG  r-   rs  nvmlDeviceGetHandleByIndex)r   r  r   handles       r#   _get_pynvml_handlerr    sq    !K
	 1S $F+F..v6FM % SJKQRRSs   A A*A%%A*c                     t         st        d      t        	 t        j                          t        |       } t        j                         |    }|S # t        j
                  $ r}t        d      |d }~ww xY w)Nz=amdsmi does not seem to be installed or it can't be imported.z>amdsmi driver can't be loaded, requires >=ROCm5.6 installation)	r7  r  r  r8  r9  r:  r-   _get_amdsmi_device_indexr<  )r   r   r  s      r#   _get_amdsmi_handlerr  &  sx    !K
	
 &f-F0026:FM !! L
	s   A A4#A//A4c                     t        | d      }t               }t        |d         t        u rt	        d      t        t        t        t        t           |                  }||vrt	        d| d| d      ||   S )zKReturn the amdsmi index of the device, taking visible_devices into account.Tr   r   z5HIP_VISIBLE_DEVICES should be indices and not stringsrr  z% is not visible (HIP_VISIBLE_DEVICES=r   )
r   r5  r   r   r-   dictr"  r   r   rM   )r   r   rk  idx_maps       r#   r  r  6  s    
FT
2C,.OOA3&RSS9T$s)_=>?G
'cU??PPQR
 	
 3<r"   c                 \    t               }t        |       } t        j                  |      d   S )N	vram_used)r  r  r8  amdsmi_get_gpu_vram_usager   r  s     r#   _get_amdsmi_memory_usager  D  s*     "F%f-F++F3K@@r"   c                     t               }t        |       } t        j                         |    }t        j                  |      d   S )Ngfx_activity)r  r  r8  r<  amdsmi_get_gpu_activityr  s     r#   _get_amdsmi_utilizationr  J  s=     "F%f-F0026:F))&1.AAr"   c                     t        |       }t        j                  |t        j                  j                  t        j
                  j                        S r\   )r  r8  amdsmi_get_temp_metricAmdSmiTemperatureTypeJUNCTIONAmdSmiTemperatureMetricCURRENTr  s     r#   _get_amdsmi_temperaturer  Q  s@     (F(($$--&&.. r"   c                     t        |       }t        j                  |      d   }|dk7  r|S t        j                  |      d   S )Naverage_socket_powerzN/Acurrent_socket_power)r  r8  amdsmi_get_power_info)r   r  socket_powers      r#   _get_amdsmi_power_drawr  Z  sG     (F//78NOLu++F34JKKr"   c                     t        |       }t        j                  |t        j                  j                        }d|v r|d   S |d   S )Ncur_clkclk)r  r8  amdsmi_get_clock_infoAmdSmiClkTypeGFX)r   r  
clock_infos      r#   _get_amdsmi_clock_rater  c  sH     (F--ff6J6J6N6NOJJ)$$%  r"   c                     t         j                  j                  sIt               }t	        |       } t        j                  |       }t        j                  |      j                  S t        |       S )a  Return the percent of time over the past sample period during which global (device)
    memory was being read or written as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    )
r9   r%   rJ   r  rs  r  r  nvmlDeviceGetUtilizationRatesmemoryr  r  s     r#   memory_usager  l  sU     ==$&'/226:33F;BBB'//r"   c                     t         j                  j                  sJt        |       }t	        |       } t        j                  |       }t        j                  |      j                  S t        |       S )a  Return the percent of time over the past sample period during which one or
    more kernels was executing on the GPU as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    )
r9   r%   rJ   r  rs  r  r  r  gpur  r  s     r#   utilizationr    sW     ==$V,'/226:33F;???&v..r"   c                     t         j                  j                  s!t        |       }t	        j
                  |d      S t        |       S )a	  Return the average temperature of the GPU sensor in Degrees C (Centigrades).

    The average temperature is computed based on past sample period as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    r   )r9   r%   rJ   r  r  nvmlDeviceGetTemperaturer  r  s     r#   temperaturer    s9     ==$V,..vq99&v..r"   c                     t         j                  j                  s t        |       }t	        j
                  |      S t        |       S )a	  Return the average power draw of the GPU sensor in mW (MilliWatts)
        over the past sample period as given by `nvidia-smi` for Fermi or newer fully supported devices.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    )r9   r%   rJ   r  r  nvmlDeviceGetPowerUsager  r  s     r#   
power_drawr    s7     ==$V,--f55%f--r"   c                     t         j                  j                  s!t        |       }t	        j
                  |d      S t        |       S )a  Return the clock speed of the GPU SM in Hz Hertz over the past sample period as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    r   )r9   r%   rJ   r  r  nvmlDeviceGetClockInfor  r  s     r#   
clock_rater    s9     ==$V,,,VQ77%f--r"   c                     t        | t              rt        j                  |       } | S t        | t              rt        j                  d|       } | S )zReturn the torch.device type object from the passed in device.

    Args:
        device (torch.device or int): selected device.
    rK   )r  r   r9   r   rM   r   s    r#   _get_devicer    sD     &#f% M 
FC	 ff-Mr"   c                 l    | j                   }|
t               }t        j                  j                  |   S )zvReturn the CUDA Generator object for the given device.

    Args:
        device (torch.device): selected device.
    )r   rL   r9   rK   r5   )r   r   s     r#   _get_generatorr    s/     ,,C
{::((--r"   offsetc                 @     t        |       fd}t        |       y)a'  Set the random number generator state offset of the specified GPU.

    Args:
        offset (int): The desired offset
        device (torch.device or int, optional): The device to set the RNG state.
            Default: ``'cuda'`` (i.e., ``torch.device('cuda')``, the current CUDA device).
    c                  >    t              } | j                         y r\   )r  
set_offset)default_generatorfinal_devicer  s    r#   cbz!_set_rng_state_offset.<locals>.cb  s    *<8$$V,r"   N)r  r   )r  r   r  r  s   `  @r#   _set_rng_state_offsetr    s     v&L- rNr"   c                 b    t                t        |       }t        |      }|j                         S )aP  Return the random number generator state offset of the specified GPU.

    Args:
        device (torch.device or int, optional): The device to return the RNG state offset of.
            Default: ``'cuda'`` (i.e., ``torch.device('cuda')``, the current CUDA device).

    .. warning::
        This function eagerly initializes CUDA.
    )r   r  r  
get_offset)r   r  r  s      r#   _get_rng_state_offsetr    s-     Lv&L&|4''))r"   )*c                 D    t                t        t        |   | g|i |S r\   )r   r   	_CudaBase__new__clsargsr   s      r#   	_lazy_newr    s%    L C(>t>v>>r"   c                   *     e Zd ZdZdZ fdZeZ xZS )r  TFc                     t        | j                               5  t        |   |i |cd d d        S # 1 sw Y   y xY wr\   )r   r   r   r   )r   r  r   r   s      r#   r   z_CudaBase.type!  s:     DOO%& 	17<00	1 	1 	1s   4=)	r   r   r   r   	is_sparser   r  r  r   r   s   @r#   r  r    s    GI1 Gr"   r  )_LegacyStorage_warn_typed_storage_removalc                   D    e Zd Zed        Zed        Zedddd       Zy)_CudaLegacyStoragec                 ,    t                t        d      )Nz+from_buffer: Not available for CUDA storage)r  r-   r  s      r#   from_bufferz_CudaLegacyStorage.from_buffer/  s    #%HIIr"   c                     t        d      )Nz2_new_with_weak_ptr: Not available for CUDA storager,   r  s      r#   _new_with_weak_ptrz%_CudaLegacyStorage._new_with_weak_ptr4  s    OPPr"   N)r   rW   c                    t        d      )Nz4_new_shared_filename: Not available for CUDA storager,   )r  managerr   sizer   rW   s         r#   _new_shared_filenamez'_CudaLegacyStorage._new_shared_filename8  s    QRRr"   )r   r   r   classmethodr  r  r  r!   r"   r#   r  r  .  sG    J J Q Q @DD S Sr"   r  c                   ,    e Zd Zed        Zed        Zy)ByteStoragec                 .    t                | j                  S r\   r  _dtyper   s    r#   rW   zByteStorage.dtype>      #%{{r"   c                 "    t         j                  S r\   )r9   uint8r   s    r#   r  zByteStorage._dtypeC      {{r"   Nr   r   r   r   rW   r  r!   r"   r#   r  r  =  (       r"   r  c                   ,    e Zd Zed        Zed        Zy)DoubleStoragec                 .    t                | j                  S r\   r  r   s    r#   rW   zDoubleStorage.dtypeI  r  r"   c                 "    t         j                  S r\   )r9   doubler   s    r#   r  zDoubleStorage._dtypeN      ||r"   Nr  r!   r"   r#   r
  r
  H  (       r"   r
  c                   ,    e Zd Zed        Zed        Zy)FloatStoragec                 .    t                | j                  S r\   r  r   s    r#   rW   zFloatStorage.dtypeT  r  r"   c                 "    t         j                  S r\   )r9   floatr   s    r#   r  zFloatStorage._dtypeY  r  r"   Nr  r!   r"   r#   r  r  S  r  r"   r  c                   ,    e Zd Zed        Zed        Zy)HalfStoragec                 .    t                | j                  S r\   r  r   s    r#   rW   zHalfStorage.dtype_  r  r"   c                 "    t         j                  S r\   )r9   halfr   s    r#   r  zHalfStorage._dtyped      zzr"   Nr  r!   r"   r#   r  r  ^  (       r"   r  c                   ,    e Zd Zed        Zed        Zy)LongStoragec                 .    t                | j                  S r\   r  r   s    r#   rW   zLongStorage.dtypej  r  r"   c                 "    t         j                  S r\   )r9   longr   s    r#   r  zLongStorage._dtypeo  r  r"   Nr  r!   r"   r#   r  r  i  r  r"   r  c                   ,    e Zd Zed        Zed        Zy)
IntStoragec                 .    t                | j                  S r\   r  r   s    r#   rW   zIntStorage.dtypeu  r  r"   c                 "    t         j                  S r\   )r9   rM   r   s    r#   r  zIntStorage._dtypez  s    yyr"   Nr  r!   r"   r#   r"  r"  t  s(       r"   r"  c                   ,    e Zd Zed        Zed        Zy)ShortStoragec                 .    t                | j                  S r\   r  r   s    r#   rW   zShortStorage.dtype  r  r"   c                 "    t         j                  S r\   )r9   shortr   s    r#   r  zShortStorage._dtype  r  r"   Nr  r!   r"   r#   r&  r&    r  r"   r&  c                   ,    e Zd Zed        Zed        Zy)CharStoragec                 .    t                | j                  S r\   r  r   s    r#   rW   zCharStorage.dtype  r  r"   c                 "    t         j                  S r\   )r9   int8r   s    r#   r  zCharStorage._dtype  r  r"   Nr  r!   r"   r#   r+  r+    r  r"   r+  c                   ,    e Zd Zed        Zed        Zy)BoolStoragec                 .    t                | j                  S r\   r  r   s    r#   rW   zBoolStorage.dtype  r  r"   c                 "    t         j                  S r\   )r9   boolr   s    r#   r  zBoolStorage._dtype  r  r"   Nr  r!   r"   r#   r0  r0    r  r"   r0  c                   ,    e Zd Zed        Zed        Zy)BFloat16Storagec                 .    t                | j                  S r\   r  r   s    r#   rW   zBFloat16Storage.dtype  r  r"   c                 "    t         j                  S r\   )r9   rY   r   s    r#   r  zBFloat16Storage._dtype  s    ~~r"   Nr  r!   r"   r#   r5  r5    s(       r"   r5  c                   ,    e Zd Zed        Zed        Zy)ComplexDoubleStoragec                 .    t                | j                  S r\   r  r   s    r#   rW   zComplexDoubleStorage.dtype  r  r"   c                 "    t         j                  S r\   )r9   cdoubler   s    r#   r  zComplexDoubleStorage._dtype  s    }}r"   Nr  r!   r"   r#   r9  r9    s(       r"   r9  c                   ,    e Zd Zed        Zed        Zy)ComplexFloatStoragec                 .    t                | j                  S r\   r  r   s    r#   rW   zComplexFloatStorage.dtype  r  r"   c                 "    t         j                  S r\   )r9   cfloatr   s    r#   r  zComplexFloatStorage._dtype  r  r"   Nr  r!   r"   r#   r>  r>    r  r"   r>  c                       e Zd ZdZd Zd Zy)_WrappedTritonKernelzBJust a simple wrapper to store some metadata for testing purposes.c                      || _         d| _        y r    kernelkernel_invoked)r   rF  s     r#   r   z_WrappedTritonKernel.__init__  s    #r"   c                 8     | j                   |i |}d| _        |S )NTrE  )r   r  r   r   s       r#   __call__z_WrappedTritonKernel.__call__  s$    dkk4*6*"
r"   N)r   r   r   r   r   rI  r!   r"   r#   rC  rC    s    L$r"   rC  c                  .   t        j                         ry t        d        } t        d        }t        j                  j                  d      d u}|rEt         j                  j                  dd| d       t         j                  j                  dd|d       y y )	Nc                  "    ddl m}  || ddi|S )Nr   )bsr_dense_mmskip_checksT)torch.sparse._triton_opsrL  )r  r   rL  s      r#   kernel_implz-_register_triton_kernels.<locals>.kernel_impl  s    9T>t>v>>r"   c                  "    ddl m}  || ddi|S )Nr   )bsr_dense_addmmrM  T)rN  rQ  )r  r   rQ  s      r#   addmm_kernel_implz3_register_triton_kernels.<locals>.addmm_kernel_impl  s    <A$A&AAr"   triton_triton_bsr_dense_mm_outzS_triton_bsr_dense_mm_out(Tensor bsr, Tensor dense, *, Tensor(a!) out) -> Tensor(a!)SparseCsrCUDA_triton_bsr_dense_addmm_outz_triton_bsr_dense_addmm_out(Tensor input, Tensor bsr, Tensor dense, *, Scalar beta, Scalar alpha, Tensor(a!) out) -> Tensor(a!))r9   _running_with_deployrC  	importlibutil	find_spec_TritonLibrary
registerOp)rO  rR  
has_tritons      r#   _register_triton_kernelsr^    s    !!#? ?
 B B
 ))(34?J''&a		
 	'')O 	
 r"   )amp	jiteratornvtxprofilersparsetunable)rr5  BFloat16Tensorr0  
BoolTensorr  
ByteTensorr+  
CharTensorr9  r>  r
  DoubleTensorr  FloatTensorr  
HalfTensorr"  	IntTensorr  
LongTensorr&  ShortTensorr   r   r   r   r   r   r  r_  caching_allocator_alloccaching_allocator_deleter  r   r   r   r  rL   r  r5   r  r   rC   r   empty_cacheget_allocator_backendCUDAPluggableAllocatorchange_current_allocatorrt   rq   rr   rO   r~  get_rng_stateget_rng_state_allr  r   r   graphsr3   r4   r   initial_seedr  rD   rS   r   r   r`  list_gpu_processesr   manual_seedmanual_seed_allmax_memory_allocatedmax_memory_cachedmax_memory_reservedmem_get_infor  memory_allocatedmemory_cachedmemory_reservedmemory_snapshotmemory_statsmemory_stats_as_nested_dictmemory_summaryr  MemPoolMemPoolContextuse_mem_poolr  r  r  ncclra  rb  randomreset_accumulated_memory_statsreset_max_memory_allocatedreset_max_memory_cachedreset_peak_memory_statsr   r   r   set_per_process_memory_fractionset_rng_stateset_rng_state_allr  r  rc  r  streamsr  rd  r  )Tr\   )rK   )r   rX  r?   	threadingr   ru   	functoolsr   typingr   r   r   r   r   r	   r
   r9   torch._Cr   _devicetorch._utilsr   r   r   torch.typesr   r   r   _utilsr   rw  r   r   r   r   r   r  r   r   r   r   ImportErrorr   localr   Lockr   r   r   r   getattrr:   r   rM   	_device_tr7  r  r%   _versionrJ   r  r8  r  errr   r8   r&   r'   r.   r0   r2   r3   r3  
_has_magmar4   r5   	Generatorr;   rA   rD   rS   rQ   r_   rf   r   r   r   r   rZ   r   OutOfMemoryErrorr   r   r   r   r-   r   r   r   r   r   rr   rq   rO   r  r  r  r  r  r5  r>  rK  rT  r`  rg  rn  rp  rs  rt  rC   rt   r~  rL   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  staticmethodr  r  torch.storager  r  r  r  r
  r  r  r  r"  r&  r+  r0  r5  r9  r>  _storage_classesaddrC  r^  r_  r`  ra  rb  rc  rd  __all__r!   r"   r#   <module>r     s	  
  	     D D D   # E E   %  3 2  y%y~~'   t	(2t8
d3i
'(  %(($7G'3T)*	)
||  &'  588,-!HH::'(?@
588+,xx44H H H 58801"XX??Hs Hs H $ ((%%	4 %02 E%((,,- 25d 5
=4 =3d 3 04 0< 2  !s B42
G"   
= 	I 	 88,, 4n7t 
, ,S T 

 
 ( )y )T ).HY/ .3 ."(9"5 "sCx ""*) *0E *"	C9 	C9 	C 	C54 54p	!8/0 	!] 	!v $3d3ic&: ; 3l	# 	 $$s)!4 @xS	2 @DI d3i DQTI 6 c  , C  D 8E#v+,>#?  C  & '+ hsm *c $tCy 3 & &
,	 
,T 
,
(8I. & $8I. & $13E#s(O 3 360S 0vs{); < "vs{); <  XeCK.@%A c AXeFCK.@%A AS ABHU63;-?$@ BC BHU63;-?$@ C L8E&#+,>#? L3 L!8E&#+,>#? !3 !0%"45 0 0*/vs{!34 / /*/vs{!34 / /*.xfck 23 .s .&.xfck 23 .s .$
c345 
%,, 
	.5<< 	.EHH,>,> 	. :@sC56	&*%S%,,(>"? *S *    ? ?  FS S$ & % $ $ # % $ $ ( - ,       = )     < (     ; '     : &     < (     ; '     ; '     ; '     ; '     ? +     / 0     . /
 
!
H # $ > =t.  G0    KsZ   3\2 ] ]  5] 2\=<\= ]	] ]		] ]] ]$]]$