
    bi/                       d dl mZ d dlZd dlmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlZd dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZmZ d dlmZmZ d dlmZ d dlm Z  dddZ!	 	 	 	 ddZ"	 	 	 	 	 	 	 	 	 	 ddZ#d Z$d Z%d Z&d Z'd Z(y)    )annotationsN)reduce)countproduct)mul)Literal)config)DataNodeListTaskTaskRefgetitem)Arrayunknown_chunk_message)concatenate_lookuptake_lookup)tokenize)HighLevelGraphc           	     ^   t        j                  | j                        j                         rt	        dt
               t        |t              sJ d       t        | j                  ||       t        | t        t        t        |            ||      } t        | ||      }d| }t        | j                  ||| j                   ||      \  }}t        |      dk(  r-t#        | j$                  | j                   | j                  |       S t'        j(                  ||| g      }t#        ||||       S )a  
    Reorders one dimensions of a Dask Array based on an indexer.

    The indexer defines a list of positional groups that will end up in the same chunk
    together. A single group is in at most one chunk on this dimension, but a chunk
    might contain multiple groups to avoid fragmentation of the array.

    The algorithm tries to balance the chunksizes as much as possible to ideally keep the
    number of chunks consistent or at least manageable.

    Parameters
    ----------
    x: dask array
        Array to be shuffled.
    indexer:  list[list[int]]
        The indexer that determines which elements along the dimension will end up in the
        same chunk. Multiple groups can be in the same chunk to avoid fragmentation, but
        each group will end up in exactly one chunk.
    axis: int
        The axis to shuffle along.
    chunks: "auto"
        Hint on how to rechunk if single groups are becoming too large. The default is
        to split chunks along the other dimensions evenly to keep the chunksize
        consistent. The rechunking is done in a way that ensures that non all-to-all
        network communication is necessary, chunks are only split and not combined with
        other chunks.

    Examples
    --------
    >>> import dask.array as da
    >>> import numpy as np
    >>> arr = np.array([[1, 2, 3, 4, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15, 16]])
    >>> x = da.from_array(arr, chunks=(2, 4))

    Separate the elements in different groups.

    >>> y = x.shuffle([[6, 5, 2], [4, 1], [3, 0, 7]], axis=1)

    The shuffle algorihthm will combine the first 2 groups into a single chunk to keep
    the number of chunks small.

    The tolerance of increasing the chunk size is controlled by the configuration
    "array.chunk-size-tolerance". The default value is 1.25.

    >>> y.chunks
    ((2,), (5, 3))

    The array was reordered along axis 1 according to the positional indexer that was given.

    >>> y.compute()
    array([[ 7,  6,  3,  5,  2,  4,  1,  8],
           [15, 14, 11, 13, 10, 12,  9, 16]])
    z/Shuffling only allowed with known chunk sizes. zaxis must be an integerzshuffle-r   )meta)dependencies)npisnanshapeany
ValueErrorr   
isinstanceint_validate_indexerchunks_rechunk_other_dimensionsmaxmaplenr   _shufflenamer   daskr   from_collections)xindexeraxisr!   tokenout_namelayergraphs           N/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/array/_shuffle.pyshuffler2      s   l 
xx=>S=TU
 	
 dC ;";; ahh.!!SS'):%;T6JAQ&E%!HQXXwaffhNMFE
5zQQVVQVVQXXA66++He1#NE&q11    c                   t        j                  d      }t        |d      }|r*t        |      }t	        t
        t        t        |            |z  }|dk  r	 |S t        |      D ]  }g }t        ||         |d|z  z  z  }	| |   D ]  }
|
||	z  kD  rht        j                  |
|	z        }t        ||
      }t        |
|      \  }}|g|z  }t        |      D ]  }||xx   dz  cc<    |j                  |       s|j                  |
        t        |      ||   k(  st        |      dk(  r|j!                  |       t        |      ||<    |r*|S )Narray.chunk-size-tolerance   )r	   getr#   r%   r   r   r$   listmathceilmindivmodrangeextendappendtupleremove)input_chunks
new_chunkschangeable_dimensionsmaximum_chunkchunksize_tolerancen_changeable_dimensionschunksize_inc_factorinew_chunksizesup_chunksize_limit_for_dimcfactor	chunksize	remaindernciis                   r1   _calculate_new_chunksizesrR   _   s    !**%ABq)M  "%&;"<%c3sJ+?@=P1$> ; +, 	2AN *-Z]);$-D)DE*& "!_ -*-GGG!YYq+E'EFF !^F+1!V+<(Iy#v-B#I. $2!$ #))"- #))!,-" ^$
15^9LPQ9Q%,,Q/!.1JqM9	2  F r3   c                   |dk(  sJ d       t        j                  d      }|t        | j                  |         |z  k  r| S t	        t        t        | j                                    |hz
  }t        | j                        }|f||<   t        t        t        t        | j                              }t        | j                  |||      }| j                  |   ||<   | j                  t        |            S )NautozOnly auto is supported for nowr5   )r	   r7   r#   r!   setr=   r%   r8   r   r   r$   rR   rechunkr@   )r*   longest_groupr,   r!   rF   rD   rC   rE   s           r1   r"   r"      s     V=== **%ABAHHTN+.AAAc!((m 45>ahhJ%'Jt 3C 23M*	*3]J xx~Jt99U:&''r3   c                6   t        |t              rt        d |D              st        d      |t	        |       k  st        d| dt	        |        d      t        t        t
        |            t        | |         k\  rt        dt        | |          d      y )Nc              3  <   K   | ]  }t        |t                y wN)r   r8   ).0rI   s     r1   	<genexpr>z$_validate_indexer.<locals>.<genexpr>   s     /U
1d0C/Us   z5indexer must be a list of lists of positional indiceszAxis z! is out of bounds for array with z axesz9Indexer contains out of bounds index. Dimension only has z
 elements.)	r   r8   allr   r%   r#   r$   sum
IndexError)r!   r+   r,   s      r1   r    r       s    gt$C/UW/U,UPQQ3v;D6:3v;-uM
 	
 3sGVD\!22GFSWLHYGZZde
 	
 3r3   c                J
   t        | |       t        |      t        |          k(  r?d}t        ||          D ]'  \  }}|t        t	        |||z               k7  r n||z  }) | i fS t        j                  d      }	t        t        |          t        |          z  |	z        }
g g }}|D ]  }t        |      t        |      z   |
kD  r0t        |      dkD  r"|j                  |       |j                         }M|j                  |       t        |      |
|	z  kD  sp|j                  |       g } t        |      dkD  r|j                  |       t        j                  t        j                  |    d            }t        t        fdt!        |       D               }t#               }t#               }t        j$                  t'        g |    |
       }d| }t)        d       gt        |       z  }t+               }d}d}t        j,                  t/        | D cg c]  }t        |       c}            D ci c]	  }||f|z    }}t!        |      D ]\  \  }}t        j                  |      }t        j0                  |      j3                  |      }d }||   }t        j4                  t        j6                  ||d	
      d      \  }}|j9                         }|j                  t        |             i } |D ]  }!g }"t        ||d d |dd        D ]  \  }}#}$t;        |!|      }%|t=        |      f}&|j                         }'|| v r| |   }(n||#|$ |dkD  r||dz
     ndz
  j3                  |      |'<   t        |      dk(  r|'   t        j0                  |         |'<   |t?        |'      z   }(tA        |(dt/        |'      f      ||(<   |(| |<   tC        |&tD        tG        ||%         tG        |(            ||&<   |"j                  |&        t;        |!|      })|f|)z   }*t        |"      dkD  rO||t?        |      z   }tA        |d|f      ||<   tC        |*tH        tK        d |"D         tG        |            ||*<   t        |"      dk(  r"|jM                  |"d         }+|*|+_'        |+||*<   tP         _ g },t!        |       D ]D  \  }-}|-k(  r)|,j                  t/        tS        t        |                   4|,j                  |       F i ||}.t/        |,      |.fS c c}w c c}w )Nr   r5   uint64)dtypec              3  X   K   | ]!  \  }}|k7  st        t        |             # y wrZ   )r=   r%   )r[   rI   rL   r,   s      r1   r\   z_shuffle.<locals>.<genexpr>   s#     KDAqd%A-Ks   **zshuffle-split-zshuffle-sorter-zshuffle-taker-right)sideT)return_indexr6   c              3  2   K   | ]  }t        |        y wrZ   )r   )r[   ms     r1   r\   z_shuffle.<locals>.<genexpr>#  s     :!71::s   )*r    r%   zipr8   r=   r	   r7   r   r^   r?   copyr>   r   cumsumarrayr   	enumeratedictmin_scalar_typer#   slicer   ndindexr@   argsortastypeuniquesearchsortedtolistconvert_keynextr   r
   r   _getitemr   concatenate_arraysr   popkeyNotImplementedErrorr$   )/r!   r+   r,   in_namer.   r-   ctridxrL   rF   chunk_size_limitcurrent_chunkrC   chunk_boundarieschunk_tuplesintermediatesmergesrb   
split_nameslicessplit_name_suffixessorter_name
taker_name	old_index
old_blocksnew_chunk_idxnew_chunk_takersorter
sorter_keysorted_arraysource_chunk_nrtaker_boundarytaker_cachechunk_tuple
merge_keysb_startb_end	chunk_keyr'   
this_slice	taker_keymerge_suffixout_name_mergetoutput_chunksrI   r/   s/     `                                            r1   r&   r&      ss   fgt,
7|s6$<(('6$<0 	FCd5cAg.//1HC	
 2: **%AB3vd|,s6$</@@CVVW !#B:M #}C(+;;M@RUV@Vm,HHJM  %=!$47J$JJ!!-0 "# =A-( yy&,h!GH KIf,=KLL FMVFsCF4LC2BCDE!%)JDk]S[(F'#K!J E6*Ba3q6*B$CD 	G:	))J  +4J*? D*&((?3O,33E:
&v.*,))OO,lI+
' (..0c/23' 6	*KJ%(!4nQR6H& (!7E (Q=	"D)<$=>#[[]
 # +AI %WU367!e+AE2DfUm t$ ?+q0+5d+;BJJv<N+O
4( *Xj-A AI/7!AuZ'8#90M), &/KN&*(GJy,A$BGIDV'd# !!$'=(@ '{M4HL&[<7N:"%!,x/?!?J)1*q&k)JF:&)-"&:z:;J'*~& ZA%!%%jm4&)*~&))m6	*D*L M&! $19  s3
';!<=  #	$ (v''E&&a +Cs   T9T c                     t        | |d         S )Nr6   r   )objindexs     r1   rz   rz   9  s    3a!!r3   c           	         t         t        j                  t        | d               | |      t	        j
                  |d         |      S )Nr   )r,   r6   )r   r   dispatchtyper   rs   )arrsr   r,   s      r1   r{   r{   =  sB    2##DaM24dC


6!9 r3   c                R    t        |       } | j                  ||       t        |       S rZ   )r8   insertr@   )r}   chunkr,   s      r1   rx   rx   E  s#    
s)CJJtU:r3   )rT   )r+   zlist[list[int]]r,   r   r!   Literal['auto'])rD   rU   rE   r   )
r*   r   rW   r   r,   r   r!   r   returnr   ))
__future__r   r9   	functoolsr   	itertoolsr   r   operatorr   typingr   numpyr   r(   r	   dask._task_specr
   r   r   r   dask.array.chunkr   dask.array.corer   r   dask.array.dispatchr   r   	dask.baser   dask.highlevelgraphr   r2   rR   r"   r    r&   rz   r{   rx    r3   r1   <module>r      s    "   $     9 9 $ 8 ?  .H2V,58,IL,^(( ((+(5D(
(0
A'H"r3   