
    biV                    
   d dl mZ d dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZmZ d d	l m!Z! d d
l"m#Z#m$Z$ d dl%m&Z&m'Z' d dl(m)Z) d dl*m+Z+m,Z, d dl-m.Z. d dl/m0Z0m1Z1  G d de      Z2 G d de2      Z3 G d dee2      Z4 G d de4      Z5 G d de5      Z6 G d dee4      Z7 G d de7      Z8 G d dee4      Z9 G d  d!e9      Z: G d" d#e2      Z; G d$ d%ee4      Z<y)&    )annotationsN)delayed)AliasDataNodeListTask)methods)to_pyarrow_string)apply_and_enforceis_dataframe_like)	BlockwiseExprLengthsLiteralPartitionsFiltered
Projectiondetermine_column_projection
no_default)Len)RepartitionRepartitionToFewer)_BackendData_convert_to_list)	make_meta)_meta_from_arraysorted_division_locations)Key)funcnameis_series_likec                      e Zd Zd Zy)IOc                R    t        |       j                   d| j                  dd   dS )N(i))type__name___nameselfs    Y/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/dask_expr/io/io.py__str__z
IO.__str__#   s+    t*%%&a

23'8::    N)r&   
__module____qualname__r+    r,   r*   r!   r!   "   s    ;r,   r!   c                  X    e Zd ZdZg dZed        Zd Zej                  d        Z
d Zy)	FromGraphzA DataFrame created from an opaque Dask task graph

    This is used in persist, for example, and would also be used in any
    conversion from legacy dataframes.
    )layer_meta	divisionskeysname_prefixc                $    | j                  d      S )Nr3   operandr(   s    r*   r3   zFromGraph._meta0   s    ||G$$r,   c                $    | j                  d      S )Nr4   r8   r(   s    r*   
_divisionszFromGraph._divisions4   s    ||K((r,   c                D    | j                  d      dz   | j                  z   S )Nr6   -)r9   deterministic_tokenr(   s    r*   r'   zFromGraph._name7   s!    ||M*S043K3KKKr,   c                    t        | j                  d            }t        | j                         | j                  d            D ]  \  }}t	        ||      ||<    |S )Nr2   r5   )dictr9   zip__dask_keys__r   )r)   dsknewolds       r*   _layerzFromGraph._layer;   sW    4<<() D..0$,,v2FG 	'HCS#CH	'
r,   N)r&   r-   r.   __doc___parameterspropertyr3   r;   	functoolscached_propertyr'   rF   r/   r,   r*   r1   r1   '   sG     IK% %) L Lr,   r1   c                  @    e Zd ZdZej
                  d        Zd Zd Zy)BlockwiseIOFc                     yN   r/   r(   s    r*   _fusion_compression_factorz&BlockwiseIO._fusion_compression_factorG   s    r,   c                   | j                   rt        |t              rt        | j                        r|j                  d      }t        | ||      }t        |      }| j                  D cg c]	  }||v s| }}t        |      t        | j                        k(  ry d|i}| j                  |      }|j                  |k7  r||   }|S y y y c c}w Ncolumns)_absorb_projections
isinstancer   r   r3   r9   r   r   rT   setsubstitute_parameters)r)   parent
dependentsparent_columnsproposed_columnscolsubstitutionsresults           r*   _simplify_upzBlockwiseIO._simplify_upK   s    $$6:.!$**- $^^I6N:4T/0@A/3||WsFV?VWW#$DLL(99&(89M//>F~~//M . / %  Xs   )	C	3C	c                H   | j                   dk\  ry t        |t              ry t        |       }t        |t              rU|j                  }|j                         D ]6  }|| u r|j                  |      r|j                  |t        ||            }8 |j                  | |      S )NrP   )new_partitions)	rQ   rV   FusedIOr   npartitionsdependencies_broadcast_dep
substituter   )r)   rY   fusednpartsargs        r*   _tune_upzBlockwiseIO._tune_up_   s    **a/fg&fi(&&F**, $;,,S1#..[VDF	   u--r,   N)	r&   r-   r.   rU   rJ   rK   rQ   r`   rk   r/   r,   r*   rM   rM   D   s*     (.r,   rM   c                      e Zd ZdgZej
                  d        Zej
                  d        Zd Zej
                  d        Z	d Z
ddZed        Zd	 Zy
)rc   _exprc                X    | j                  d      j                  dz   | j                  z   S )Nrm   z-fused-)r9   	_funcnamer>   r(   s    r*   r'   zFusedIO._namet   s'    ||G$..:T=U=UUUr,   c                8    | j                  d      j                  S )Nrm   )r9   r3   r(   s    r*   r3   zFusedIO._metax   s    ||G$***r,   c                    g S Nr/   r(   s    r*   re   zFusedIO.dependencies|   s    	r,   c                ,    t        | j                        S rr   )len_fusion_bucketsr(   s    r*   rd   zFusedIO.npartitions   s    4''((r,   c                   | j                  d      j                         }| j                  D cg c]
  }||d       }}|d   |j                  d        t	        |      S |j                  |d          t	        |      S c c}w )Nrm   r   )r9   r;   ru   appendtuple)r)   r4   bnew_divisionss       r*   r;   zFusedIO._divisions   s    LL)446	262F2FGQ1Q4GG#  & ]##   2/]## Hs   A?c                    | j                  d      | j                  |   }t        t        j                  t        fd|D         d      S )Nrm   c              3  B   K   | ]  }j                  |        y wrr   )_filtered_task).0iexprnames     r*   	<genexpr>z FusedIO._task.<locals>.<genexpr>   s     @A4&&tQ/@s   T_data_producer)r9   ru   r   r	   concatr   )r)   r   indexbucketr   s    `  @r*   _taskzFusedIO._task   sI    ||G$%%e,NN@@A	
 	
r,   c                   | j                  d      j                  }t        |      }t        j                  d| j                  d      j
                  z        }t        |t        j                  t        j                  |            d      }t        j                  ||z        }t        j                  ||      }t        ||dd        D cg c]
  \  }}|||  }}}|S c c}}w )Nrm   rP   d   )r9   _partitionsrt   mathceilrQ   minsqrtr   _compute_partition_boundariesrA   )	r)   
partitionsrd   bucket_sizenew_npartitionspartition_boundariesstartendbucketss	            r*   ru   zFusedIO._fusion_buckets   s    \\'*66
*oiiDLL$9$T$T TU+tyy;1G'H#N))K+$=>  2OO[ 
 !$$8:Nqr:R S
 uS!
 
 	
s   Cc                     y rr   r/   )r)   rY   s     r*   rk   zFusedIO._tune_up   s    r,   Nr   r   r   intreturnr   )r&   r-   r.   rH   rJ   rK   r'   r3   re   rd   r;   r   rI   ru   rk   r/   r,   r*   rc   rc   q   s    )KV V + + ) )$	
  (r,   rc   c                  N    e Zd ZdgZej
                  d        Zed        ZddZ	y)FusedParquetIOrm   c                    t        t        | j                  d                  j                         dz   | j                  z   S )Nrm   z-fused-parq-)r   r%   r9   lowerr>   r(   s    r*   r'   zFusedParquetIO._name   s>     T$,,w/01779&&'	
r,   c                    ddl m fd| D        }t        j                  |d      } j                  |fi |S )Nr   )ReadParquetPyarrowFSc              3  L   K   | ]  \  }}j                  ||        y wrr   )_fragment_to_table)r   fragfilterr   rT   schemas      r*   r   z6FusedParquetIO._load_multiple_files.<locals>.<genexpr>   s6      
 f !33	
s   !$
permissive)promote_options)#dask.dataframe.dask_expr.io.parquetr   paconcat_tables_table_to_pandas)frag_filtersrT   r   to_pandas_kwargstablestabler   s    ``   @r*   _load_multiple_filesz#FusedParquetIO._load_multiple_files   sH     	M
 !-
   F4#44UO>NOOr,   c                   | j                  d      }| j                  |   }g }|sJ |D ]  }|j                  ||      }|j                  }t	        |j
                        dk(  sJ |j
                  d   }	|j                  |	j                  d   |	j                  d   f       |	j                  d   }
|	j                  d   } t        || j                  |
fi dd	iS )
Nrm   rP   r   fragment_wrapperfiltersrT   r   r   T)	r9   ru   r~   kwargsrt   argsrx   r   r   )r)   r   r   r   r   fragments_filtersr   subtaskr   frag_to_table_taskrT   r   s               r*   r   zFusedParquetIO._task   s   ||G$%%e,v 	9A))$2G&~~w||$)))!(a$$&--.@A&--i8 )//	:G'..x8F	9 %%
 
  
 	
r,   N)r   strr   r   r   r   )
r&   r-   r.   rH   rJ   rK   r'   staticmethodr   r   r/   r,   r*   r   r      s<    )K
 
 P P(
r,   r   c                      e Zd Zg dZedddddZdZej                  d        Z	ej                  d        Z
d Zed        Zej                  d	        Zdd
Zy)FromMap)	func	iterablesr   r   	user_metaenforce_metadatauser_divisionslabelr   FN)r   r   r   r   r   c                    | j                   3t        | j                        j                         dz   | j                  z   S | j                   dz   | j                  z   S )Nr=   )r   r   r   r   r>   r(   s    r*   r'   zFromMap._name  sN    ::DII&,,.4t7O7OOO::#d&>&>>>r,   c                X   | j                  d      t        ur| j                  d      }t        |      S | j                  D cg c]  }|d   	 }} t	        | j
                        g || j                  i | j                  } t	        t              |      j                         S c c}w )Nr   r   )	r9   r   r   r   r   r   r   r   compute)r)   metavvalss       r*   r3   zFromMap._meta
  s    <<$J6<<,DT?""&..1QAaD1D1%7499%GtGdiiG4;;GD%79%d+3355 2s   B'c                    | j                  d      r| j                  d      S t        | j                  d         }d|dz   z  S )Nr   r   rr   rP   )r9   rt   r   )r)   rd   s     r*   r;   zFromMap._divisions  sA    <<()<< 011dnnQ/0KkAo..r,   c                >    | j                   rt        S | j                  S rr   )r   r   r   r(   s    r*   
apply_funczFromMap.apply_func  s      $$yyr,   c                    | j                   }| j                  r8|j                         }|j                  | j                  | j
                  d       |S N)_funcr3   )r   r   copyupdater   r3   r)   r   s     r*   apply_kwargszFromMap.apply_kwargs!  sE      [[]FMM!YY!ZZ r,   c                    | j                   D cg c]  }||   	 }}| j                  r.t        |t        g|| j                  i | j
                  ddiS t        || j                  g|| j                  i | j
                  ddiS c c}w )Nr   T)r   r   r   r   r   r   r   )r)   r   r   r   r   s        r*   r~   zFromMap._filtered_task.  s    "&..1Q%11  !  	
 ##  $  $))
"
%)YY
262C2C
TX
 	
 2s   Br   )r&   r-   r.   rH   r   	_defaultsrU   rJ   rK   r'   r3   r;   rI   r   r   r~   r/   r,   r*   r   r      s    
K  !I  ? ? 6 6/  
 
 

r,   r   c                      e Zd Zg dZeddddddZdZej                  d        Z	e
d        Zej                  d        Zej                  d	        Zej                  d
        Zej                   fd       Ze
d        Zd fdZ xZS )FromMapProjectable)r   r   rT   r   r   columns_arg_requiredr   r   r   r   r   _seriesFN)r   r   r   r   r   r   Tc                6    t        | j                  d            S rS   )r   r9   r(   s    r*   columns_operandz"FromMapProjectable.columns_operandW  s    Y 788r,   c                p    | j                   t        | j                  j                        S | j                   S rr   )r   list
frame_metarT   r(   s    r*   rT   zFromMapProjectable.columns[  s/    '//00'''r,   c                Z    | j                  d      xr | j                  j                  dkD  S )Nr   rP   )r9   r   ndimr(   s    r*   r   zFromMapProjectable._seriesb  s'     ||I&C4??+?+?!+CCr,   c                    | j                  d      }| j                  s| j                  r|j                         }| j                  |d<   |S )Nr   rT   )r9   r   r   r   rT   )r)   optionss     r*   r   zFromMapProjectable.kwargsh  s?    ,,x($$(<(<llnG!%GIr,   c                    | j                   }| j                  r8|j                         }|j                  | j                  | j
                  d       |S r   )r   r   r   r   r   r   r   s     r*   r   zFromMapProjectable.apply_kwargsp  sE      [[]FMM!YY!__ r,   c                p    t         |   }|j                  dkD  r| j                  || j                     S |S rO   )superr3   r   r   )r)   r   	__class__s     r*   r   zFromMapProjectable.frame_meta}  s:     w}99q=T11=,,--r,   c                    | j                   }| j                  r,t        | j                        dkD  sJ || j                  d      S |S Nr   )r   r   rt   r   r)   r   s     r*   r3   zFromMapProjectable._meta  sH     <<t++,q000,,Q/00r,   c                    t         |   ||      }| j                  r)t        |t        j
                  || j                  d         S |S r   )r   r~   r   r   operatorgetitemrT   )r)   r   r   tskr   s       r*   r~   z!FromMapProjectable._filtered_task  s?    g$T51<<h..T\\!_EE
r,   r   )r&   r-   r.   rH   r   r   rU   rJ   rK   r   rI   rT   r   r   r   r   r3   r~   __classcell__r   s   @r*   r   r   >  s    K  !I 9 9 ( ( D D
   
 
     r,   r   c            	      .    e Zd ZU dZg dZdddddddddZded<   dZej                  d	        Z
ej                  d
        Zej                  d        Zej                  d        ZddZ fdZd Zej                   fd       Zd ZddZd ZeZ xZS )
FromPandasz*The only way today to get a real dataframe)	framerd   sort	chunksizerT   pyarrow_strings_enabledr   r   _pd_length_statsNTF)rd   r   rT   r   r   r   r   r   tuple | Noner   c                    | j                  d      j                  }| j                  r1|j                  j                  s|j                         }t        |      S | j                  d      S )Nr   )r9   _datar   r   is_monotonic_increasing
sort_indexr   )r)   r   s     r*   r   zFromPandas.frame  sQ    W%++99U[[@@$$&E&&||G$$r,   c                ,   | j                   r.t        t        | j                  j	                  d                  }n| j                  j	                  d      }| j                  d      -| j                  r|| j                  d      S || j                     S |S )NrP   r   rT   )r   r   r
   r   headr9   r   rT   r   s     r*   r3   zFromPandas._meta  su    ''.tzzq/ABCD::??1%D<<	".,0LL4Q(Pd4<<>PPr,   c                    | j                  d      }| 	 t        | j                  j                        S t        |      S # t        $ r# | j
                  dk(  r| j                  gcY S g cY S w xY w)NrT   rP   )r9   r   r   rT   AttributeErrorr   r   r   )r)   r   s     r*   rT   zFromPandas.columns  sj    ,,y1"DJJ..// $O44 " 99> II;&	s   ? %A+&A+*A+c                   t        | j                  t              sJ | j                  d      }| j                  }||f}| j                  j
                  }||vr| j                  j                  }t        |      }|dk(  r|xs d}dg|dz   z  }dt        |      z  }n|s*| j                  j                  j                  j                  r+t        |j                  || j                  d            \  }}nf|| j                  d      }	n!t        t        j                  ||z              }	t        t        d||	            t        |      gz   }dt        |      z  }||f||<   ||   S )Nrd   r   rP   rr   r   )rd   r   )rV   r   r   r9   r   _division_infor   rt   r   r   r   r   r   r   r   range)
r)   rd   r   key_division_info_cachedatanrows	locationsr4   r   s
             r*   _divisions_and_locationsz#FromPandas._divisions_and_locations  sL   $**l333ll=1yyD!#zz88**::##DIEz).QC;?3	#c)n4	))//GG'@JJ +"ll;7($	9 & $[ 9I #DIIek.A$B CI q%!;<D	{J	#c)n4	(19(< %#C((r,   c                      j                   6 j                         t         fdt        dd        D               _          j                   S )Nc              3  h   K   | ])  \  }}j                   r|j                  v r
||   z
   + y wrr   )	_filteredr   )r   r   offsetr  r)   s      r*   r   z*FromPandas._get_lengths.<locals>.<genexpr>  s:      *Av~~d.>.>)> 1%*s   /2rP   )r   
_locationsry   	enumerate)r)   r  s   `@r*   _get_lengthszFromPandas._get_lengths  sN      ()I$) *!*9QR=!9* %D!
 $$$r,   c                   t        |t              r| j                         }|rt        |      S t        |t              r&| j                         }|rt        t        |            S t        |t              rt        | !  ||      S y rr   )	rV   r   r  r   r   sumr   r   r`   )r)   rY   rZ   _lengthsr   s       r*   r`   zFromPandas._simplify_up  st    fg&((*Hx((fc"((*Hs8}--fj)7'
;; *r,   c                     | j                   d   S r   r	  r(   s    r*   r;   zFromPandas._divisions      ,,Q//r,   c                f    | j                   r
t        |   S t        | j                  d         dz
  S Nr   rP   )r  r   rd   rt   r	  )r)   r   s    r*   rd   zFromPandas.npartitions  s0    >>7&&40034q88r,   c                     | j                   d   S rO   r  r(   s    r*   r  zFromPandas._locations  r  r,   c                D   | j                         ||dz    \  }}| j                  j                  || }| j                  rt	        |      }| j                  d      ;t        || j                  r|| j                  d            S || j                           S t        ||      S )N   rT   r   )	r  r   ilocr   r
   r9   r   r   rT   )r)   r   r   r   stopparts         r*   r~   zFromPandas._filtered_task  s    oo'	:tzzuT*''$T*D<<	".t||d4<<?+ AEdllAS  d##r,   c                    | j                   r@| j                  d      r/| j                  rd| j                  d    dS d| j                   dS y)NrT   zdf[r   ]df)rU   r9   r   rT   r(   s    r*   r+   zFromPandas.__str__  sK    ##Y(?||T\\!_-Q//a((r,   )r   r   )r   r   r   r   r   r   )r&   r-   r.   rG   rH   r   __annotations__rU   rJ   rK   r   r3   rT   r	  r  r`   r;   rd   r  r~   r+   __repr__r   r   s   @r*   r   r     s    4
K #' 	I #"% %   
5 
5 ) ):%<0 9 9
0	$ Hr,   r   c                  Z    e Zd Zg dZdddddZdZej                  d        Ze	d        Z
y)FromPandasDivisions)r   r4   rT   r   r   r   r   NF)rT   r   r   r   Tc                     d| j                   z   S )Nzfrom_pd_divs-)r>   r(   s    r*   r'   zFromPandasDivisions._name;  s    #d&>&>>>r,   c                   t        | j                  t              sJ t        | j	                  d            }| j                  j
                  }||vr| j                  j                  }|j                  j                  r|j                  j                  |d      }n,t        j                  |j                  j                  |d      }t        |      |d<   ||f||<   ||   S )Nr4   bfill)methodleft)siderw   )rV   r   r   ry   r9   r  r   r   	is_uniqueget_indexernpsearchsortedvaluesrt   )r)   r  r  r  indexers        r*   r	  z,FromPandasDivisions._divisions_and_locations?  s    $**l333DLL-.#zz88**::##Dzz##**00W0E //$***;*;SvNd)GBK(+W %#C((r,   )r&   r-   r.   rH   r   r   rJ   rK   r'   rI   r	  r/   r,   r*   r%  r%  )  sQ    K  	I D? ? ) )r,   r%  c                  \    e Zd ZddgZed        Zd Zej                  d        Z	d	dZ
d Zy)
FromScalarsr   namesc                "    | j                         S rr   )re   r(   s    r*   _scalarszFromScalars._scalarsS  s      ""r,   c                V    t        | j                        t        | j                        fS rr   )r   r4  maxr(   s    r*   r;   zFromScalars._divisionsW  s    DJJTZZ11r,   c                     t        | j                        | j                  D cg c]  }|j                   c}| j                  | j                  j
                        S c c}w )N)r   r   )r%   r   r6  r3   r4  r   r)   ss     r*   r3   zFromScalars._metaZ  sB    tDII"mm,QWW,DJJTYY^^
 	
,s   Ac                    | j                   dft        | j                        | j                  D cg c]  }|j                   df c}| j                  d | j                  j
                  fiS c c}w r   )r'   r%   r   r6  r4  r   r:  s     r*   rF   zFromScalars._layer`  s[    ZZOTYY'+}}5!!''15

		
 	
 6s   A+c                   t        |t              rt        |j                        t        | j                        k(  ry g g }}t        | j                  | j                        D ]6  \  }}||j                  v s|j                  |       |j                  |       8  t        |       t        |       | j                  |g| g|j                  dd   S y rO   )rV   r   sortedrT   r4  rA   r6  rx   r%   r   operands)r)   rY   rZ   	new_namesnew_scalarsnr;  s          r*   r`   zFromScalars._simplify_upk  s    fj)fnn%

);;%'{IDJJ6 *1&$$Q'&&q)*  4<T
499i>+>AGQRQSAT  *r,   N)r   r@   )r&   r-   r.   rH   rI   r6  r;   rJ   rK   r3   rF   r`   r/   r,   r*   r3  r3  P  sG    7#K# #2 
 

	
r,   r3  c                      e Zd Zg dZddddddZdZej                  d        Zej                  d        Z	ej                  d        Z
d	 Zej                  d
        ZddZy)	FromArray)r   r   original_columnsr   rT   r   iP  N)r   rE  r   rT   r   Tc                    t        | j                  | j                  d      | j                  d            }| j                  d      || j                  d         S |S )NrE  r   rT   )r   r   r9   r   s     r*   r3   zFromArray._meta  sT    JJ%78$,,v:N
 <<	".Y/00r,   c                    | j                  d      It        | j                        rdgS t        t	        t        | j                  j                                    S | j                  d      S )NrE  r   )r9   r   r3   r   r  rt   rT   r(   s    r*   rE  zFromArray.original_columns  sT    <<*+3djj)s
c$**"4"45677||.//r,   c                    | j                  d      t        dt        | j                              S t	        | j                        D cg c]  \  }}|| j                  d      v r| c}}S c c}}w )NrT   r   )r9   slicert   rE  r  )r)   r   r]   s      r*   _column_indiceszFromArray._column_indices  sk    <<	"*C 5 5677 $D$9$9:
3dll9-- 
 	
 
s   	A,c                    t        t        dt        | j                        | j                              }|t        | j                        dz
  fz   }|S r  )ry   r  rt   r   r   )r)   r4   s     r*   r;   zFromArray._divisions  sA    %3tzz?DNNCD	TZZ1!4 66	r,   c                "    | j                         S rr   )r;   r(   s    r*   unfiltered_divisionszFromArray.unfiltered_divisions  s      r,   c           	        | j                   t        || j                  z  |dz   | j                  z           }|t        | j                        dz
  k(  r-t        | j                  |   | j                  |dz      dz         }n)t        | j                  |   | j                  |dz            }t        | j                        rMt        |t        | j                        ||| j                  j                  | j                  j                  d      S |j                  dk(  r|d d | j                  f   }t        |t        | j                        ||| j                  j                  d      S )NrP   r  Tr   )r   rI  r   rt   rM  r  r   r3   r   r%   dtyper   r   rJ  rT   )r)   r   r   r  idxs        r*   r~   zFromArray._filtered_task  s?   zz% 6dnn8TUVC112Q66))%0))%!)4q8C
 ))%0$2K2KETUI2VC $**%TZZ 

  

#  yyA~At3334TZZ 

""# r,   r   )r&   r-   r.   rH   r   rU   rJ   rK   r3   rE  rJ  r;   rM  r~   r/   r,   r*   rD  rD  y  s    K  I   0 0 
 

 ! ! r,   rD  )=
__future__r   rJ   r   r   numpyr.  pyarrowr   daskr   dask._task_specr   r   r   r   dask.dataframer	   dask.dataframe._pyarrowr
   dask.dataframe.corer   r   dask.dataframe.dask_expr._exprr   r   r   r   r   r   r   r   $dask.dataframe.dask_expr._reductionsr   %dask.dataframe.dask_expr._repartitionr   r   dask.dataframe.dask_expr._utilr   r   dask.dataframe.dispatchr   dask.dataframe.io.ior   r   dask.typingr   
dask.utilsr   r   r!   r1   rM   rc   r   r   r   r   r%  r3  rD  r/   r,   r*   <module>ra     s    "       7 7 " 5 D	 	 	 5 Q I - L  /; ;
 :*.)R *.Z<k <~;
W ;
|M
 + M
`V VrO#[ Od$)* $)N&" &RV"K Vr,   