
    bi                    N    d dl mZ d dlZd dlmZ d dlmZmZ  G d d      Z	d Z
y)    )annotationsN)_get_pyarrow_dtypes_meta_from_dtypesc                  L    e Zd Zed        Zed        Zed        Zed        Zy)ArrowORCEnginec                   t        |      dk(  r(|j                  |d         s|j                  |d         }d }g }	d }
|rd}|D ]  }|j                  |d      5 }t	        j
                  |      }||j                  }n||j                  k7  rt        d      t        t        |j                              }|r|	j                  ||d| fg       ||j                  k  rA|	j                  ||||t        |      z    fg       |t        |      z  }||j                  k  rA|rt        |      dkD  r||j                  z  }nd}d d d         n\|D ]W  }|?|j                  |d   d      5 }t	        j
                  |      }|j                  }d d d        |	j                  |d fg       Y t        |d       }|4t        |      t        |      z
  }|rt        d| dt        |       d	      | j                  |||	      }	|t        |      n|}t!        |t"              r|gn|}t%        |||g       }|	||fS # 1 sw Y   xY w# 1 sw Y   xY w)
N   r   c                Z    || j                   }|S || j                   k7  rt        d      |S )N,Incompatible schemas while parsing ORC files)schema
ValueError)_or   s     V/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/io/orc/arrow.py_get_schemaz1ArrowORCEngine.read_metadata.<locals>._get_schema   s6    ~ M 299$ !OPPM    rbr   )
categorieszRequested columns (z) not in schema ())lenisfilefindopenorcORCFiler   r   listrangenstripesappendintr   set_aggregate_files
isinstancestrr   )clsfspathscolumnsindexsplit_stripesaggregate_fileskwargsr   partsr   offsetpathfo_stripesexmetas                     r   read_metadatazArrowORCEngine.read_metadata
   sh    u:?299U1X#6GGE!H%E	 F #WWT4( #AAA~!"188+()WXX#E!**$56HtXa-?&@%AB 1::-"HVfs=?Q6Q$RST #m"44	 !1::-
 '3}+=+A!**,!"## ##(  ->q40 *AKKN!"* tTl^,- %V=WF+B )"->s6{m1M 
 $$_mUK")/$v,w%eS1u &%<fd""Q# #** *s   CH8!"H8+"I8I	I	c                @   |du rt        |      dkD  rt        |      dkD  r}g }|d   }t        |d   d         }|dd  D ]J  }t        |d   d         }||z   |k  r|j                  |d          ||z  }6|j                  |       |}|}L |j                  |       |S |S )NTr	   r   )r   r   r   )	r$   r*   r)   r,   	new_partsnew_partr   partnext_nstripess	            r   r!   zArrowORCEngine._aggregate_filesQ   s    d"s='9A'=#e*q.IQxH8A;q>*Hab	 - #DGAJ 8+}<OODG,-H$$X.#H,H- X&Lr   c           	         g }|D ]  \  }}|t        |||||      z  } t        j                  j                  |      j	                  d      S )NF)date_as_object)_read_orc_stripespaTablefrom_batches	to_pandas)	r$   r%   r,   r   r'   r+   batchesr.   stripess	            r   read_partitionzArrowORCEngine.read_partitione   sW    " 	MMD'(T7FGLLG	Mxx$$W-77u7MMr   c                    t         j                  j                  |      }|j                  |j                  j                  ||g      d      5 }t        j                  ||       d d d        y # 1 sw Y   y xY w)Nwb)r=   r>   from_pandasr   sepjoinr   write_table)r$   dfr.   r%   filenamer+   tabler/   s           r   write_partitionzArrowORCEngine.write_partitionl   s[    $$R(WWRVV[[$!12D9 	&QOOE1%	& 	& 	&s   A--A6N)__name__
__module____qualname__classmethodr4   r!   rC   rM    r   r   r   r   	   sU    D# D#L  & N N & &r   r   c                "   |t        |      }g }| j                  |d      5 }t        j                  |      }|t	        |j
                        n|}|D ]#  }	|j                  |j                  |	|             % 	 d d d        |S # 1 sw Y   |S xY w)Nr   )r   r   r   r   r   r   r   read_stripe)
r%   r.   rB   r   r'   rA   r/   r0   r1   stripes
             r   r<   r<   s   s     v,G	t	 ;KKN(/5$W 	;FNN1==9:	;;
 N;
 Ns   ABB)
__future__r   pyarrowr=   pyarrow.orcr   dask.dataframe.io.utilsr   r   r   r<   rR   r   r   <module>rZ      s#    "   Jg& g&Tr   