
    Ǆgi0                     ,   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZmZ d dlmZ g dZ edd	
      ZdefdZ ed       G d dee                Z ed       G d de             Z ed       G d dee                Zy)    N)defaultdict)	AnyCallableDefaultDictIteratorListOptionalSizedTypeTypeVar)functional_datapipe)	DataChunkIterDataPipe)_check_unpickable_fn)BatcherIterDataPipeGrouperIterDataPipeUnBatcherIterDataPipe_T_coT)	covariantnamec                     | dv ret        j                  d|  d|  dt        d       t        t        j
                  j                  j                  j                  j                  |       S t        dt         d|        )	N)SHARDING_PRIORITIESShardingFilterIterDataPipe`zc` from `torch.utils.data.datapipes.iter.grouping` is going to be removed in PyTorch 2.1Please use `z5` from the `torch.utils.data.datapipes.iter.sharding`   )category
stacklevelzmodule z has no attribute )warningswarnFutureWarninggetattrtorchutilsdata	datapipesitershardingAttributeError__name__)r   s    p/home/mcse/projects/flask_80/flask-venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/grouping.py__getattr__r+       s{    DDv & UW"		
 u{{''1166??FF
78*,>tfE
FF    batchc                        e Zd ZU dZeed<   eed<   eed<   defdededede	e   ddf
 fd	Z
dee   fd
ZdefdZ xZS )r   a2  
    Creates mini-batches of data (functional name: ``batch``).

    An outer dimension will be added as ``batch_size`` if ``drop_last`` is set to ``True``, or ``length % batch_size`` for the
    last batch if ``drop_last`` is set to ``False``.

    Args:
        datapipe: Iterable DataPipe being batched
        batch_size: The size of each batch
        drop_last: Option to drop the last batch if it's not full
        wrapper_class: wrapper to apply onto each batch (type ``List``) before yielding,
            defaults to ``DataChunk``

    Example:
        >>> # xdoctest: +SKIP
        >>> from torchdata.datapipes.iter import IterableWrapper
        >>> dp = IterableWrapper(range(10))
        >>> dp = dp.batch(batch_size=3, drop_last=True)
        >>> list(dp)
        [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    datapipe
batch_size	drop_lastFwrapper_classreturnNc                 r    |dkD  sJ d       t         |           || _        || _        || _        || _        y )Nr   z+Batch size is required to be larger than 0!)super__init__r/   r0   r1   r2   )selfr/   r0   r1   r2   	__class__s        r*   r6   zBatcherIterDataPipe.__init__J   s?     A~LLL~ $"*r,   c              #     K   g }| j                   D ]A  }|j                  |       t        |      | j                  k(  s-| j	                  |       g }C t        |      dkD  r!| j
                  s| j	                  |       y y y wNr   )r/   appendlenr0   r2   r1   )r7   r-   xs      r*   __iter__zBatcherIterDataPipe.__iter__X   s~      	ALLO5zT__,((//		
 u:>>>((// " s   ;BABc                 8   t        | j                  t              r`| j                  r"t	        | j                        | j
                  z  S t	        | j                        | j
                  z   dz
  | j
                  z  S t        t        |       j                   d      )N   z# instance doesn't have valid length)	
isinstancer/   r
   r1   r<   r0   	TypeErrortyper)   r7   s    r*   __len__zBatcherIterDataPipe.__len__c   ss    dmmU+~~4==)T__<<DMM*T__<q@T__TTtDz2233VWXXr,   )r)   
__module____qualname____doc__r   __annotations__intboolr   r   r6   r   r>   rE   __classcell__)r8   s   @r*   r   r   .   sz    , OO  )2++ + 	+
 I+ 
+	0(9- 	0Y Yr,   r   unbatchc                   .    e Zd ZdZddedefdZd Zd Zy)	r   a   
    Undos batching of data (functional name: ``unbatch``).

    In other words, it flattens the data up to the specified level within a batched DataPipe.

    Args:
        datapipe: Iterable DataPipe being un-batched
        unbatch_level: Defaults to ``1`` (only flattening the top level). If set to ``2``,
            it will flatten the top two levels, and ``-1`` will flatten the entire DataPipe.

    Example:
        >>> # xdoctest: +SKIP
        >>> from torchdata.datapipes.iter import IterableWrapper
        >>> source_dp = IterableWrapper([[[0, 1], [2]], [[3, 4], [5]], [[6]]])
        >>> dp1 = source_dp.unbatch()
        >>> list(dp1)
        [[0, 1], [2], [3, 4], [5], [6]]
        >>> dp2 = source_dp.unbatch(unbatch_level=2)
        >>> list(dp2)
        [0, 1, 2, 3, 4, 5, 6]
    r/   unbatch_levelc                      || _         || _        y N)r/   rO   )r7   r/   rO   s      r*   r6   zUnBatcherIterDataPipe.__init__   s     *r,   c              #   |   K   | j                   D ]'  }| j                  || j                        E d {    ) y 7 w)NrO   )r/   _diverO   )r7   elements     r*   r>   zUnBatcherIterDataPipe.__iter__   s:     }} 	MGzz'9K9KzLLL	MLs   0<:<c              #   v  K   |dk  rt        d      |dk(  r>t        |t        t        f      r#|D ]  }| j	                  |d      E d {     y | y |dk(  r| y t        |t        t        f      r&|D ]   }| j	                  ||dz
        E d {    " y t        d| j                   d      7 k7 "w)Nz unbatch_level must be -1 or >= 0rS   r   r@   zunbatch_level z" exceeds the depth of the DataPipe)
ValueErrorrA   listr   rT   
IndexErrorrO   )r7   rU   rO   items       r*   rT   zUnBatcherIterDataPipe._dive   s     2?@@B'D)#45# BD#zz$bzAAAB aM'D)#45# QD#zz$ma>OzPPPQ !$T%7%7$88Z[  B Qs%   AB9	B5
A
B9B7!B97B9N)r@   )	r)   rF   rG   rH   r   rJ   r6   r>   rT    r,   r*   r   r   m   s%    ,+ +c +Mr,   r   groupbyc                       e Zd ZdZdddddddee   deegef   ded	e	d
e
e	   de
e	   defdZd Zd ZddZd Zd Zd Zy)r   a!
  
    Groups data from IterDataPipe by keys from ``group_key_fn``, yielding a ``DataChunk`` with batch size up to ``group_size``.

    (functional name: ``groupby``).

    The samples are read sequentially from the source ``datapipe``, and a batch of samples belonging to the same group
    will be yielded as soon as the size of the batch reaches ``group_size``. When the buffer is full,
    the DataPipe will yield the largest batch with the same key, provided that its size is larger
    than ``guaranteed_group_size``. If its size is smaller, it will be dropped if ``drop_remaining=True``.

    After iterating through the entirety of source ``datapipe``, everything not dropped due to the buffer capacity
    will be yielded from the buffer, even if the group sizes are smaller than ``guaranteed_group_size``.

    Args:
        datapipe: Iterable datapipe to be grouped
        group_key_fn: Function used to generate group key from the data of the source datapipe
        keep_key: Option to yield the matching key along with the items in a tuple,
            resulting in `(key, [items])` otherwise returning [items]
        buffer_size: The size of buffer for ungrouped data
        group_size: The max size of each group, a batch is yielded as soon as it reaches this size
        guaranteed_group_size: The guaranteed minimum group size to be yielded in case the buffer is full
        drop_remaining: Specifies if the group smaller than ``guaranteed_group_size`` will be dropped from buffer
            when the buffer is full

    Example:
        >>> import os
        >>> # xdoctest: +SKIP
        >>> from torchdata.datapipes.iter import IterableWrapper
        >>> def group_fn(file):
        ...     return os.path.basename(file).split(".")[0]
        >>> source_dp = IterableWrapper(["a.png", "b.png", "a.json", "b.json", "a.jpg", "c.json"])
        >>> dp0 = source_dp.groupby(group_key_fn=group_fn)
        >>> list(dp0)
        [['a.png', 'a.json', 'a.jpg'], ['b.png', 'b.json'], ['c.json']]
        >>> # A group is yielded as soon as its size equals to `group_size`
        >>> dp1 = source_dp.groupby(group_key_fn=group_fn, group_size=2)
        >>> list(dp1)
        [['a.png', 'a.json'], ['b.png', 'b.json'], ['a.jpg'], ['c.json']]
        >>> # Scenario where `buffer` is full, and group 'a' needs to be yielded since its size > `guaranteed_group_size`
        >>> dp2 = source_dp.groupby(group_key_fn=group_fn, buffer_size=3, group_size=3, guaranteed_group_size=2)
        >>> list(dp2)
        [['a.png', 'a.json'], ['b.png', 'b.json'], ['a.jpg'], ['c.json']]
    Fi'  N)keep_keybuffer_size
group_sizeguaranteed_group_sizedrop_remainingr/   group_key_fnr_   r`   ra   rb   rc   c                4   t        |       || _        || _        || _        || _        t        t              | _        d| _        || _	        d | _
        ||d|cxk  r|k  sJ  J || _
        ||d|cxk  r|k  sJ  J || _
        || _        t        | _        y r:   )r   r/   rd   r_   max_buffer_sizer   rY   buffer_elementscurr_buffer_sizera   rb   rc   r   r2   )r7   r/   rd   r_   r`   ra   rb   rc   s           r*   r6   zGrouperIterDataPipe.__init__   s     	\* ( *7B47H !$%)"!k&=z0[00000)3D& ,)a2G.U:.UUU.UUU)>D&,&r,   c                    d }d}d }| j                   j                         D ]8  }t        | j                   |         |kD  st        | j                   |         }|}: | j                  =|| j                  k  r.| j                  s"t        dt        | j                   |               | j                  || j                  k\  r| j                   |   }| xj                  |z  c_        | j                   |= |S )Nr   zFailed to group items)rg   keysr<   rb   rc   RuntimeErrorstrrh   )r7   biggest_keybiggest_sizeresult_to_yieldfindkeys        r*   _remove_biggest_keyz'GrouperIterDataPipe._remove_biggest_key   s    ++002 	&G4''01L@"4#7#7#@A%	& &&2t999'''T-A-A+-N)O 
 &&.t999"22;?O-  -r,   c              #     K   | j                   D ]5  }| j                  |      }| j                  |   j                  |       | xj                  dz  c_        | j
                  | j
                  t        | j                  |         k(  rj| j                  | j                  |         }| j                  r||fn| | xj                  t        | j                  |         z  c_        | j                  |= | j                  | j                  k(  s| j                         }|| j                  |      }| j                  r||fn| 8 t        | j                  j                               D ]^  }| j                  | j                  j                  |            }| xj                  t        |      z  c_        | j                  r||fn| ` y w)Nr@   )r/   rd   rg   r;   rh   ra   r<   r2   r_   rf   rq   tuplerj   pop)r7   r=   keyresultro   s        r*   r>   zGrouperIterDataPipe.__iter__  s     	EA##A&C  %,,Q/!!Q&!*t#$$S)C 0 *.););D<P<PQT<U)V'+}}sFm&@%%T-A-A#-F)GG%((-$$(<(<<"&":":"<".!//@F+/==3-fD%	E( --2245 	=C''(<(<(@(@(EFF!!S[0!#'==3-f<	=s   D	GG B-Gc                 :    d| _         t        t              | _        y r:   )rh   r   rY   rg   rD   s    r*   resetzGrouperIterDataPipe.reset%  s     !*40r,   c           
      0   | j                   | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  f
}t        j                  t        j                  |      S |S rQ   )r/   rd   r_   rf   ra   rb   rc   r2   _valid_iterator_id_number_of_samples_yieldedr   getstate_hookr7   states     r*   __getstate__z GrouperIterDataPipe.__getstate__)  s    MMMM  OO&&##++
 %%1--e44r,   c                     |\
  | _         | _        | _        | _        | _        | _        | _        | _        | _        | _	        d| _
        t        t              | _        y r:   )r/   rd   r_   rf   ra   rb   rc   r2   rz   r{   rh   r   rY   rg   r}   s     r*   __setstate__z GrouperIterDataPipe.__setstate__:  sZ     	
MM O&#+ !*40r,   c                 8    | j                   j                          y rQ   )rg   clearrD   s    r*   __del__zGrouperIterDataPipe.__del__J  s    ""$r,   )r3   N)r)   rF   rG   rH   r   r   r   r   rK   rJ   r	   r6   rq   r>   rx   r   r   r   r\   r,   r*   r   r      s    *b  $(/3$'u%' w|,'
 ' ' SM'  (}' '<:=41"1 %r,   r   )r   collectionsr   typingr   r   r   r   r   r	   r
   r   r   (torch.utils.data.datapipes.iter.shardingr"   %torch.utils.data.datapipes._decoratorr   #torch.utils.data.datapipes.datapiper   r   'torch.utils.data.datapipes.utils.commonr   __all__r   rl   r+   r   r   r   r\   r,   r*   <module>r      s     #
 
 
 0 E G H 	4(Gc G W;Y,y1 ;Y ;Y| Y1L 1  1h Yh%,y1 h%  h%r,   