
    Ǆg              
       >   U d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZ ddgZ e       s:d dlZ G d	 d
      Zd Zeej.                  d   _        eej.                  d   _        yd dlmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%  e jL                  e'      Z(er	 d dl)m*Z*  G d dejZ                        Z. e.       Z/e.e0d<   dde1fdZ2 G d d      Zddde1dee3df   de
ee1df      defdZy# e+$ r e(jY                  d       Y gw xY w)    N)reduce)chain)DictListOptionalTupleTYPE_CHECKINGUnion)is_available)not_noneinit_device_mesh
DeviceMeshc                       e Zd Zy)_DeviceMeshStubN)__name__
__module____qualname__     e/home/mcse/projects/flask_80/flask-venv/lib/python3.12/site-packages/torch/distributed/device_mesh.pyr   r      s    r   r   c                       y Nr   r   r   r   _init_device_mesh_stubr      s    r   ztorch.distributed.device_mesh)_find_pg_by_ranks_and_tag_get_default_group_get_group_tagget_backendget_process_group_ranksget_rankget_world_sizeinit_process_groupis_initialized	new_groupProcessGroup)	ArrayLikezCDeviceMesh requires numpy >= 1.21 to be installed for type checkingc            	       >   e Zd ZddZddZdddeedf   d	eeedf      ddfd
Z		 dddde
e   ddfdZddZddde
e   fdZededefd       Zededefd       ZdddedefdZ	 ddedede
ej&                     ddfdZdeeedf      fdZdddeded   fdZy)_MeshEnvreturnNc                 J    g | _         i | _        i | _        i | _        i | _        y r   )
mesh_stackchild_to_root_mappingmesh_dim_group_optionsroot_to_flatten_mappingflatten_name_to_root_dimsselfs    r   __init__z_MeshEnv.__init__A   s7    02DOGID&  ' UWD(  *r   r   c                 f    t        | j                        dk(  rt        d      | j                  d   S )Nr   z#No device mesh is currently active!)lenr*   RuntimeErrorr/   s    r   get_current_meshz_MeshEnv.get_current_meshM   s.    4??#q("#HII??2&&r   device_meshsubmesh_dim_names.submesh_dimsc                    |D cg c]=  }t        |      dkD  rt        fd|      nj                  j                  |d         ? }}j                  }g }g }d}	t	        ||      D ]  \  }
}t        |
      dkD  rv|j                  |
d   |	z
  |
d   |	z
        }|j                  |
d   |	z
         |	t        |
      dz
  z  }	|j                  | j                     |   j                  d          |j                  |
d   |	z
         |j                  j                  |
d              t        t        |j                              }|D ]  }|j                  |          |j                  g || j                  dg| }j                         }|D ]"  }t!        j"                  ||d      }||v s!|}$ |_        | j$                  |<   |S c c}w )N   c                 r    j                   j                  |       j                   j                  |      z  S r   )meshsize)xyr7   s     r   <lambda>z*_MeshEnv.create_sub_mesh.<locals>.<lambda>^   s/    !1!1!6!6q!9K<L<L<Q<QRS<T!T r   r   r3   )	start_dimend_dimFmesh_dim_names_init_backend)r4   r   r=   r>   zipflattenappendr-   _dim_group_infoslistrangendimremovepermutereshaper   r   device_typer+   )r0   r7   r8   r9   mesh_dimslice_dim_sizemesh_tensorslice_dim_idxslice_dim_group_infonum_dims_flattenmesh_dim_indicesmesh_dim_namemesh_dims_remained_idxidxpg_ranks_by_dimcur_rankmesh_ndsubmeshres_submeshs    `                 r   create_sub_meshz_MeshEnv.create_sub_meshR   s\   " !-  x=1$	 T
 !%%**8A;78N  &**KM#%   !36|EV3W / - '(1,"-"5"5"21"58H"H 0 47G G #6 #K "(()9!)<?O)OP$,<(=(AA$(//44[A)**1. "(()9!)<?O)OP(//#445Ea5HI+4 &*%0@0@*A%B"$ 3&--c231k11 '*7gb+)+O #++-H* *$++#4"'	 w&")K* ,@K(6AD&&{3Is   AGrY   c                    t         j                  |      }t        |j                        D cg c]&  }t        |j                        j	                  |      ( }}|s5dj                  |D cg c]  }t        |j                        |    c}      }| j                  j                  |i        t        g t        t        |j                              | j                  |   j                          }||v rt        | d| dd| d      || j                  v r#|| j                  |   v r| j                  |   |   S t        j                  |j                  j!                               }t        t#        |j                  j$                              }	|D ]  }
|	j'                  |
         |j                  j(                  g |	| j+                  d|      }|j-                         }|D ]"  }t/        |j0                  ||f      }||v s!|}$ || j2                  <   || j                  j                  |i       |<   t5        |      | j                  |   |<   |S c c}w c c}w )N_z# already exists for submesh of the . z5The mesh_dim_names of submesh and flattened mesh are z-. Please specify another valid mesh_dim_name.r3   rE   )_mesh_resourcesget_root_meshr   rE   indexjoinr.   
setdefaultr   rK   keysr5   r-   mathprodr=   r>   rL   rM   rN   rO   rP   r   r   rQ   r+   tuple)r0   r7   rY   	root_meshflattened_mesh_dim_nameflatten_dims_in_rootdiminvalid_dim_namesflattened_mesh_dim_sizeremained_dims_in_rootflatten_dim_in_rootr\   r]   r^   flattened_meshres_flattened_meshs                   r   create_flatten_meshz_MeshEnv.create_flatten_mesh   s    (55kBI 088R8R/S$+ 112889PQ$  $
 ! # $8 !!9!9:3?! **55iD % !hy7789!//	:??A!  11"$o%HSUVKL]K^ _B C  T999!T%A%A)%LL33I>}MM&*ii0@0@0E0E0G&H#$(y~~/B/B)C$D!'; B#%,,-@AB 5inn44 &)=gb12  !))+H* 8!+))$1#3"
 w&)7&8 >GD&&'9:TfD((33IrB=QGLMaGbD**95mD%%w$s   +I+Ic                 F    | j                   j                  |d       }|s|S |S r   )r+   get)r0   r7   ro   s      r   rg   z_MeshEnv.get_root_mesh   s)     2266{DII&/;>Y>r   c                     | j                  |      }|j                  }|r.|r,t        |      dk(  sJ d       |d   }| j                  ||      S y)z
            Returns the index of the mesh dim in the root mesh.
            The device_mesh passed in needs to be sliced out from the root mesh
            or submesh of the root mesh.
            r;   z"The submesh can only be a 1D mesh.r   N)rg   rE   r4   get_mesh_dim_by_name)r0   r7   ro   child_mesh_dim_nameschild_mesh_dim_names        r   get_root_mesh_dimz_MeshEnv.get_root_mesh_dim   sf     **;7I#.#=#= 1,-28782&:1&=#00<OPPr   rQ   c                 4    t        |       j                         S r   )_get_device_handledevice_countrQ   s    r   num_devices_per_hostz_MeshEnv.num_devices_per_host   s    %k2??AAr   c                 B    t               t        j                  |       z  S r   )r    r'   r   r   s    r   	num_hostsz_MeshEnv.num_hosts   s     "#x'D'D['QQQr   c                     |j                   t        |j                         dk(  rt        d      ||j                   vrt        d| dd|j                          t        |j                   j	                  |            S )Nr   zNo `mesh_dim_names` found.zMesh dimension 'z' does not exist.z.Available mesh dimensions are: mesh_dim_names=)rE   r4   KeyErrorr   rh   )r0   r7   rY   s      r   r}   z_MeshEnv.get_mesh_dim_by_name  s     **2{112a70  K$>$>>&}o5FGD[E_E_D`a  K66<<]KLLr   rr   backend
pg_optionsc                 &    ||f| j                   |<   y r   )r,   )r0   rr   r   r   s       r   _set_mesh_dim_group_optionsz$_MeshEnv._set_mesh_dim_group_options  s     18/DD'',r   c                   	 || j                  |      k7  rt        d      | j                  j                  |i        | j                  |   }g |j                  |	t        	fd|D              st        d| d	 d      d}g }|D ]k  }||v r||   }|d   }|j                  |       n-|j                  j                  |      }|j                  |f       ||k  rt        d| dd| d	d
      |}m |S )z
            Validate whether the mesh_dim_names is valid for slicing the given device_mesh.
            If valid, return dim indexes of the slice mesh in the device mesh.
            z'Cannot create a submesh from a submesh.c              3   &   K   | ]  }|v  
 y wr   r   ).0rY   valid_mesh_dim_namess     r   	<genexpr>z0_MeshEnv._get_slice_mesh_dims.<locals>.<genexpr>1  s      ! !55s   zInvalid mesh_dim_names z% specified. Valid mesh_dim_names are .r3   z specified. z!Found mesh dim indices to slice: rd   z.Mesh dim indices should be in ascending order.)	rg   r5   r.   rj   rE   allr   rI   rh   )
r0   r7   rE   r.   curr_idxslice_mesh_dimsrY   mesh_indicesnext_idxr   s
            @r   _get_slice_mesh_dimsz_MeshEnv._get_slice_mesh_dims  s[    d00=="#LMM **55k2F(,(F(F{(S%$++$*$ 
  %3  -n-= >00D/EQH  H O!/ $ $==#<]#KL  ,B/H#**<8*99??NH#**H;7x'"1.1AN;O;LBOH 
 $!$$ #"r   c                 r   | j                  ||      }|j                  j                  d|      j                  d|j                  j	                  |            }|j                         }g }|D ]H  }t        |j                  ||fd      }||v r|j                  |   gng |_        |j                  |       J |S )z`
            Return all the submeshes of a given mesh dimension of the device mesh.
            r3   FrD   )
r}   r=   swapdimsrP   r>   r   r   rQ   rJ   rI   )	r0   r7   rY   rR   r\   r]   res_submeshesmesh_1dr_   s	            r   _get_all_submeshesz_MeshEnv._get_all_submeshesR  s     00mLH)..77HEMMK$$))(3O #++-HM* .$++$1#3"'	  7* !11(;< (
 $$W-. ! r   r(   Nr(   r   r   )r7   r   r(   r   )r   r   r   r1   r6   r   strr   intra   r   ry   rg   r   staticmethodr   r   r}   r$   Optionsr   r   r   r   r   r   r'   r'   @   s   
		'
N	%N	  %S#XN	 uS#X/	N	
 N	b MQ@	&+@	&<DSM@	&@	&D	?	 	(3- 	  
	Bc 	Bc 	B 
	B 
	R3 	R3 	R 
	R
	M+	M<?	M	M* :>		E	E 	E !!5!56		E
 	E2	#%S/"2	#h	!+	!<?	!,	!r   r'   rf   rQ   c                 $    t        t        | d      S )a:  
        Get the module corresponding to the device_type which is cuda or cuda-like device.
        For example, when the device_type is cuda, the module `torch.cuda` is returned.
        Return None when there is no corresponding module for device_type, otherwise
        return the corresponding module.
        N)getattrtorchr   s    r   r   r   q  s     uk400r   c                   \   e Zd ZU dZeed<   ej                  ed<   ee	edf      ed<   ddddede
ej                  d	f   dee	edf      d
eddf
dZd Zd Zd$dZd%dZdefdZd ZdedefdZde
ee	edf   f   dd fdZd&dee
eef      defdZdee   fdZe	 d&ddde
eee   f   dedee
ej                  d	f      dee	edf      dd f
d       Zd&dee   defdZedefd       Zede	edf   fd       Z defdZ!d&dee
eef      defd Z"deee      fd!Z#d&d"ee   dd fd#Z$y)'r   a  
        DeviceMesh represents a mesh of devices, where layout of devices could be
        represented as a n-d dimension array, and each value of the n-d dimensional
        array is the global id of the default process group ranks.

        DeviceMesh could be used to describe the layout of devices across the cluster,
        and serves as a proxy for communication among the device lists within the cluster.

        DeviceMesh can be used as a context manager.

        .. note::
            DeviceMesh follows SPMD programming model, which means the same PyTorch Python program
            is running on all processes/ranks in the cluster. Therefore, users need to make sure the
            `mesh` array (which describes the layout of devices) should be identical across all ranks.
            Inconsistent `mesh` will lead to silent hang.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
            mesh (ndarray): A multi-dimensional array or an integer tensor describing the layout
                of devices, where the IDs are global IDs of the default process group.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        The following program runs on each process/rank in an SPMD manner. In this example, we have 2
        hosts with 4 GPUs each.
        A reduction over the first dimension of mesh will reduce across
        columns (0, 4), .. and (3, 7), a reduction over the second dimension
        of mesh reduces across rows (0, 1, 2, 3) and (4, 5, 6, 7).

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import DeviceMesh
            >>>
            >>> # Initialize device mesh as (2, 4) to represent the topology
            >>> # of cross-host(dim 0), and within-host (dim 1).
            >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
        rQ   r=   .rE   NTrD   r%   rF   r(   c                   || _         t        |t        j                        r'|j                  j
                  dk7  rt        d|       t        |t        j                        r.|j                         j                  t        j                        n%t        j                  |dt        j                        | _        |rt        |      nd | _        t        | j                  j                         j                               | _        d | _        |dk7  r|r | j%                          | j'                          t)               r&t+               dk(  rt-        j.                         | _        | j                  t1               k(  j3                         }|j5                  d      dv sJ |j5                  d      dkD  r|d   j                         nd | _        y y )	Ncpuz!`mesh` must be a CPU tensor, got dtypedevicer   xlathreadedr   )r   r;   )rQ   
isinstancer   Tensorr   type
ValueErrordetachtor   tensorr=   rn   rE   rH   tolist_flatten_mesh_list
_thread_id_get_or_create_default_group_init_process_groupsr"   r   	threading	get_identr   nonzeror>   _coordinate_on_dim)r0   rQ   r=   rE   rF   rank_coordss         r   r1   zDeviceMesh.__init__  sq     +D$-$++2B2Be2K #DTF!KLL dELL1   uyy 1\\$uEIIF I
 <J%"7tD ',DII,=,=,?,F,F,H&ID#"DO e# !557--/!#(C&/&9&9&;DO  $yyHJ6??A"''*f444/:/?/?/BQ/FKN))+D ' $r   c           	         t               }|s
t                t               }| j                  j	                         |kD  r*t        d| d| j                  j	                          d      t        | j                        }|sZ|rX|j                         }||kD  r'||z  dk7  rt        d| d| d| j                   d      |j                  t               |z         t               S )	Nz2Mesh should not be bigger than default world size z, but found z ranks!r   z8DeviceMesh only support homogeneous hardware, but found z ranks and  z	 devices!)r"   r!   r    r=   numelr5   r   rQ   r   
set_devicer   r   )r0   default_initialized
world_sizedevice_handler   s        r   r   z'DeviceMesh._get_or_create_default_group  s    "0"2&"$')Jyy :-"HT`aeajajapapar`ssz{  /t/?/?@M&= (5'A'A'C$!55"%99Q>&R%,k2F1GqIYIYHZZce  ((6J)JK%''r   c           	         g }| j                   j                  dk(  r| j                   j                         t               k(  rt	               }t        t        t                           }t        j                  j                         rt        |      dk(  rt        d|      n|}|j                  t        |      ||j                  f       || _        y t        | j                   j                        D ]  }| j                   j                  d|      j!                  d| j                   j#                  |            }|D ]  }|j%                         }|t&        j(                  v rt&        j(                  |   \  }	}
nd\  }	}
t        ||	|
      }| j+                         |v sbt-        |      |kD  r t/        d| j+                          d	| d
      |j                  t        t1        |            ||j                  f         || _        y )Nr;   gloozcpu:gloo,cuda:nccl)r   ranksr3   )NN)r   r   r   zFEach device mesh dimension should get only one process group, but got z in !)r=   rM   r   r    r   rK   rL   r   cudar   r   r#   rI   r   
group_namer   rP   r>   r   rf   r,   r   r4   r5   r   rJ   )r0   dim_group_infosdefault_groupr   	dim_grouprr   r\   dim_meshsubgroup_ranksr   r   s              r   r   zDeviceMesh._init_process_groups  s    ACOyy~~"tyy'8N<L'L !3 4U>#345 zz..0#M2f< &:%H '	   &&&y1!,,j %4D!Y !0 +C '+ii&8&8S&A&I&IDIINN3/'O
 %4 #)1): /"H"HH !0 F Fs K ' * 3=/GZ
 %."0$+'1%	  ==?n<"?3c9&2&lmqmzmzm|l} ~**8)9%<'" !" ,22$28I3F$G$2$-$8$8!";#+X %4D!r   c                 D    t         j                  j                  |        | S r   )rf   r*   rI   r/   s    r   	__enter__zDeviceMesh.__enter__8  s    &&--d3Kr   c                 @    t         j                  j                          y r   )rf   r*   pop)r0   exc_type	exc_valueexc_tracebacks       r   __exit__zDeviceMesh.__exit__>  s    &&**,r   c                     | j                   s-d| j                   d| j                  j                          d}|S d| j                   d| j                  j                          d| j                    d}|S )NzDeviceMesh('z', )z, mesh_dim_names=)rE   rQ   r=   r   )r0   device_mesh_reprs     r   __repr__zDeviceMesh.__repr__B  s     ** t//0DII4D4D4F3GqI 
 $# $D$4$4#5S9I9I9K8LL]^b^q^q]rrst 
 $#r   c                     t        | dd       | _        | j                  sQt        | j                  | j                  j
                  | j                  | j                  | j                  f      | _        | j                  S )N_hash)	r   r   hashr   r=   shaperQ   rE   r   r/   s    r   __hash__zDeviceMesh.__hash__J  sc     w5DJ::!//		((++
 ::r   otherc                    t        |t              syt        |       t        |      k(  ry| j                  |j                  k(  xr~ | j                  j
                  |j                  j
                  k(  xrO | j                  |j                  k(  xr4 | j                  |j                  k(  xr | j                  |j                  k(  S )NFT)	r   r   idr   r=   r   rQ   rE   r   )r0   r   s     r   __eq__zDeviceMesh.__eq__Y  s    eZ0$x2e9$ ++u/G/GG <		5::+;+;;<((E,=,==< ++u/C/CC< 5+;+;;r   c                     | j                   st        d      t        |t              r|fn|}|| j                   k(  r| S t        j                  | |      }t        j                  | ||      }|S )aU
  
            Slice the current DeviceMesh based on the mesh_dim_names given to create a submesh.
            The submesh created consists of the dimensions and the communicators indicated by
            ``mesh_dim_names``

            Args:
                mesh_dim_names (Union[str, Tuple[str]]): the name or the tuple of names of the
                mesh dimension of the DeviceMesh to create the submesh for.
            Returns:
                A :class:`DeviceMesh` object

            The following program runs on each process/rank in an SPMD manner in a world size of 8.
            In the first example:
                Calling mesh_2d["tp"] on rank 0, 1, 2, 3 returns a 1D submesh of DeviceMesh:([0, 1, 2, 3]).
                Calling mesh_2d["tp"] on rank 4, 5, 6, 7 returns a 1D submesh of  DeviceMesh:([4, 5, 6, 7]).
                Calling mesh_2d["dp"] on rank 0, 4 returns a 1D submesh of  DeviceMesh:([0, 4]).
                Calling mesh_2d["dp"] on rank 1, 5 returns a 1D submesh of  DeviceMesh:([1, 5]).
                Calling mesh_2d["dp"] on rank 2, 6 returns a 1D submesh of  DeviceMesh:([2, 6]).
                Calling mesh_2d["dp"] on rank 3, 7 returns a 1D submesh of  DeviceMesh:([3, 7]).

            In the second example:
                Calling mesh_3d["dp", "cp"] on rank 0, 1, 4, 5 returns a 2D submesh of DeviceMesh:([[0, 1], [4, 5]]).
                Calling mesh_3d["dp", "cp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 3], [6, 7]]).
                Calling mesh_3d["cp", "dp"] on rank 0, 1, 4, 5 returns a 2D submesh of DeviceMesh:([[0, 4], [1, 5]]).
                Calling mesh_3d["cp", "dp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 6], [3, 7]]).

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize a 2D device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh_2d = init_device_mesh(device_type="cuda", (2,4), mesh_dim_names=("dp", "tp"))
                >>> tp_mesh = mesh_2d["tp"]
                >>> dp_mesh = mesh_2d["dp"]
                >>>
                >>> # Initialize a 3D mesh.
                >>> mesh_3d = init_device_mesh(device_type="cuda", (2,2,2), mesh_dim_names=("dp", "pp", "cp"))
                >>> # The order of the mesh_dim_names provided deteremines the order of dimensions in the submesh.
                >>> dp_cp_mesh = mesh_3d["dp", "cp"]
                >>> cp_dp_mesh = mesh_3d["cp", "dp"]
            z1Cannot slice a DeviceMesh without mesh_dim_names!)rE   r5   r   r   rf   r   ra   )r0   rE   r   r_   s       r   __getitem__zDeviceMesh.__getitem__g  s|    Z &&"#VWW &0%D!.  !4!44"1"F"F.# *99./ r   rR   c                    t        | d      st        d      | j                  j                  dkD  r'|%t        d| j                  j                   ddd      | j                  j                  dk(  r#|!t	        t        | j                  d	   dd
        S t        j                  |       }t        j                  j                  |d      }|r8||j                         v r&||   j                  d	   dd
 }t	        t        |       S t        |t              rt        j                  | |      n|}t	        t        | j                  |   dd
        S )a  
            Returns the single ProcessGroup specified by mesh_dim, or, if mesh_dim is not specified and the
            DeviceMesh is 1-dimensional, returns the only ProcessGroup in the mesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                A :class:`ProcessGroup` object.
            rJ   z*DeviceMesh process groups not initialized!r;   NFound the DeviceMesh have  dimensionsJOptional kwarg `mesh_dim` needs to be specified when device_mesh.ndim > 1.zmIf you want to get the list of all the ProcessGroups in the DeviceMesh,please use `get_all_groups()` instead.r      )hasattrr5   r=   rM   r   r   rJ   rf   rg   r-   r{   rk   r   r   r}   )r0   rR   ro   r-   r   s        r   	get_groupzDeviceMesh.get_group  s\    4!34"#OPPyy~~!h&6"00@L`=  yy~~"x'7-t/D/DQ/G/KL  (55d;I&5&M&M&Q&Q4'# '87N7S7S7U+U"9("C"T"TUV"WXZYZ"[ 9? KLL "(C0 $88xH! 
  -t/D/DX/NrPQ/RS r   c                     t        | j                  j                        D cg c]  }| j                  |       c}S c c}w )z
            Returns a list of ProcessGroups for all mesh dimensions.

            Returns:
                A list of :class:`ProcessGroup` object.
            )rL   r=   rM   r   )r0   is     r   get_all_groupszDeviceMesh.get_all_groups  s.     05TYY^^/DE!DNN1%EEEs   =re   groupc                   t        | t              rt        |       }t        |t        j                        r|j                         |k7  s|||k7  rt        dt        |       d|       t        j                  |dt        j                        }t        |||d      }t        |       || j                  fg|_        |S t        |       }t        |      dk(  rt        d      |t        d	      t        |t        j                        r/|j!                         j#                  t        j                  d
      n%t        j                  |dt        j                        }|j$                  t        |      k7  r)t        d|j                          dt        |       d      t        |||d      }|D  cg c]#  } t        |       t        |       | j                  f% c} |_        |S c c} w )aM  
            Constructs a :class:`DeviceMesh` with ``device_type`` from an
            existing :class:`ProcessGroup`.

            The constructed device mesh has number of dimensions equal to the
            number of groups passed. If more than one group is passed, then the
            ``mesh`` argument is required.
            zInvalid mesh z for ProcessGroup with ranks r   r   FrD   r   z.Expects at least one ProcessGroup to be passedz0Must pass mesh if passing multiple ProcessGroups)r   r   zEExpects mesh with ndim equal to number of ProcessGroups but got mesh z and z ProcessGroups)r   r$   r   r   r   r   r   r   r   r   r   r   r   rJ   rK   r4   r   r   rM   )r   rQ   r=   rE   group_ranksr7   groupss          r   
from_groupzDeviceMesh.from_group  s     %.5e<tU\\2t{{}7S&4;+>$'D	{2OP[}]  ||KUYYO(#1"'	 $E*K9I9IJ0, #"%[F6{a !QRR| !STT dELL1   uyy ?\\$uEIIF 
 yyCK'  KKM?%F}NL  %T.PUK $, 	 #5)+E2$$,K( ,s   3(G%c                 p    || j                   j                         S | j                   j                  |      S r   )r=   r   r>   )r0   rR   s     r   r>   zDeviceMesh.size  s*    (0(8499??$VdiinnX>VVr   c                 .    | j                   j                  S r   )r=   rM   r/   s    r   rM   zDeviceMesh.ndim  s    99>>!r   c                 @    t        | j                  j                        S r   )rn   r=   r   r/   s    r   r   zDeviceMesh.shape#  s    ))r   c                     t               S )z:
            Returns the current global rank.
            )r   r/   s    r   r   zDeviceMesh.get_rank'  s     :r   c                     | j                   dkD  r&|$t        d| j                  j                    dd      |d}t        | j	                  |            }t        |t              sJ d       t        t        |            S )a{  
            Returns the local rank of the given mesh_dim of the DeviceMesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                An integer denotes the local rank.

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 0, 1, 2, 3 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 4, 5, 6, 7 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 0, 4 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 1, 5 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 2, 6 would return 2.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 3, 7 would return 3.

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            r;   r   r   r   r   z1We expect ProcessGroup before calling `get_rank`!)rM   r5   r=   r   r   r   r$   r   )r0   rR   mesh_dim_groups      r   get_local_rankzDeviceMesh.get_local_rank-  s    8 yy1}!1"00@L`  !%dnnX&>?N CBC  H^455r   c                 6    | j                   r| j                   S dS )z
            Return the relative indices of this rank relative to all
            dimensions of the mesh. If this rank is not part of the mesh, return None.
            N)r   r/   s    r   get_coordinatezDeviceMesh.get_coordinateW  s    
 /3.E.E4**O4Or   rY   c                 \    | j                   st        d      t        j                  | |      S )a\  
            Returns a 1D DeviceMesh by flattening the current DeviceMesh.

            If no mesh_dim_name is provided, the default is a string concatentaing the mesh_dim_names of the
            given submesh with each mesh_dim_name separated by "_". For example, if we have a 3D mesh
            DeviceMesh([[[0, 1], [2, 3]], [[4, 5], [6, 7]]], mesh_dim_names=("dp", "cp", "tp")), calling
            mesh_3d["dp", "cp"]._flatten() will create a 1D submesh DeviceMesh([0, 1, 2, 3], mesh_dim_names=("dp_cp",))
            on rank 0, 1, 2, 3 and a 1D submesh DeviceMesh([4, 5, 6, 7], mesh_dim_names=("dp_cp",)) on rank 4, 5, 6, 7.

            After the flattened dimension is created, to access the flattened dimesnion in mesh_3d, one can use the
            existing slicing method to obtain the flattened mesh through calling mesh_3d["dp_cp"].
            z3Cannot flatten a DeviceMesh without mesh_dim_names!)rE   r5   rf   ry   )r0   rY   s     r   _flattenzDeviceMesh._flatten^  s2     &&"I  #66t]KKr   r   r   r   )%r   r   r   __doc__r   __annotations__r   r   r   r   r
   boolr1   r   r   r   r   r   r   objectr   r   r   r$   r   r   r   r   r   r>   propertyrM   r   r   r   r  r  r   r   r   r   r   z  sV   %	N ll sCx11 9="&(	(	 k12(	
 %U38_5(	  (	 (	T	(:I	4V		-	$c 	$		 	4 	=	"'U38_(<"==	=	~,	huS#X&? ,	< ,	\	FD$6 	F 
 @D<	
 9=<	tL'99:<	<	 5{!:;<<	
 %U38_5<	 <	 
<	|	W# 	W# 	W 
	"# 	" 
	" 
	*5c? 	* 
	*	c 	(	68E#s(O+D (	6PS (	6T	PHT#Y$7 	P	L(3- 	L< 	Lr   re   
mesh_shape.rE   r(   c          	      "   |kt        t        |            t        |      k7  rt        dd|       t        |      t        |      k7  r%t        ddt        |       dt        |       d      | r | j                         st        d|  d	d
      t	        j
                  d      5  t	        j                  t        j                  |      t        j                        j                  |      }ddd       t        | |      }|S # 1 sw Y   xY w)a  
        Initializes a `DeviceMesh` based on `device_type`, `mesh_shape`, and `mesh_dim_names` parameters.

        This creates a DeviceMesh with an n-dimensional array layout, where `n` is the length of `mesh_shape`.
        If `mesh_dim_names` is provided, each dimension is labeled as `mesh_dim_names[i]`.

        .. note::
            `init_device_mesh` follows SPMD programming model, meaning the same PyTorch Python program
            runs on all processes/ranks in the cluster. Ensure `mesh_shape` (the dimensions of the nD array
            describing device layout) is identical across all ranks. Inconsistent `mesh_shape` may lead to hanging.

        .. note::
            If no process group is found, init_device_mesh will initialize distributed process group/groups
            required for distributed communications behind the scene.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
                Passing in a device type with a GPU index, such as "cuda:0", is not allowed.
            mesh_shape (Tuple[int]): A tuple defining the dimensions of the multi-dimensional array
                describing the layout of devices.
            mesh_dim_names (Tuple[str], optional): A tuple of mesh dimension names to assign to each dimension
                of the multi-dimensional array describing the layout of devices. Its length must match the length
                of `mesh_shape`. Each string in `mesh_dim_names` must be unique.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import init_device_mesh
            >>>
            >>> mesh_1d = init_device_mesh("cuda", mesh_shape=(8,))
            >>> mesh_2d = init_device_mesh("cuda", mesh_shape=(2, 8), mesh_dim_names=("dp", "tp"))

        Nz"Each mesh_dim_name must be unique.z/Found repeated mesh_dim_name in mesh_dim_names z6mesh_shape and mesh_dim_names should have same length!zFound len(mesh_dim_names): z and len(mesh_shape):r   z4Device type with GPU index is not supported but got rd   zUIf you maintained a 'torch.device' object, it's recommended to pass in 'device.type'.r   r   )rQ   r=   rE   )r4   setr5   isalphar   r   arangerl   rm   r   viewr   )rQ   r
  rE   r=   r7   s        r   r   r   r  s   R %3~&'3~+>>"8EnEUV 
 :#n"55"L1#n2E1FF[\_`j\k[llmn  {224F{mSUVg  \\%  	Y<<		* 5UYYGLLZXD	Y #)
 	Y 	Ys   %ADD)r   )4loggingrl   r   	functoolsr   	itertoolsr   typingr   r   r   r   r	   r
   r   torch.distributedr   torch.utils._typing_utilsr   __all__sysr   r   modulesr   r   "torch.distributed.distributed_c10dr   r   r   r   r   r   r    r!   r"   r#   r$   	getLoggerr   loggernumpy.typingr%   ImportErrorwarninglocalr'   rf   r  r   r   r   r   r   r   <module>r      sR        D D  * . |
, ~  ?NCKK/0; 0 KK'    Wx(F 	.m!9?? m!^	 !)
OX*1 1vL vLx 59	GG#s(OG !sCx1	G
 
Go  	NNU	s   .D DD