
    bi2                        d dl mZ d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZmZ d d	lmZ erd dlZd d
lmZ d dlmZ  G d d      ZddZy)    )annotationsN)partial)perf_counter)TYPE_CHECKINGAnyLiteral)eprintverbose)ComputeError))IdentityTransformedPartitionValuesBuilder_scan_pyarrow_dataset_impl)ScanCastOptions)Table)	LazyFramec                      e Zd ZdZdddd	 	 	 	 	 	 	 	 	 ddZddZddZdddd	 	 	 	 	 	 	 ddZdd	Zdd
Z	ddZ
ddZy)IcebergDatasetz Dataset interface for PyIceberg.Nsnapshot_idiceberg_storage_propertiesreader_overridec                   d | _         d | _        || _        || _        || _        t        |t              r|| _         y || _        y )N)_metadata_path_table_snapshot_id_iceberg_storage_properties_reader_override
isinstancestr)selfsourcer   r   r   s        T/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/polars/io/iceberg/dataset.py__init__zIcebergDataset.__init__   sF     #'+E(GV
 fc""(D DK    c                "    | j                         S )zFetch the schema of the table.)arrow_schema)r   s    r!   schemazIcebergDataset.schema5   s      ""r#   c                V    ddl m}  || j                         j                               S )z$Fetch the arrow schema of the table.r   schema_to_pyarrow)pyiceberg.io.pyarrowr)   tabler&   )r   r)   s     r!   r%   zIcebergDataset.arrow_schema9   s    : !4!4!677r#   )existing_resolved_version_keylimit
projectionc               F	   ddl m} ddl}|j                  j                  j                         }|rt        d| j                   d| d|        | j                         }|r"t        d|j                  j                          |dn
t        |      }| j                  }	d6d	}
| j                  xs t        j                  d
      }|r|dvrd| d}t        |      |dk(  rdn|j                   dk  sd|j                    nd}d}|	[|j#                  |	      }| |
|	      |j$                  }|d|	 d}t        |      |j'                         |   }|j(                   }nG|j+                         }|j                  j,                  }|j/                         x}|j(                   nd}|||k(  r|rt        d|d       y|dk(  r|n |j0                  | }g }t3        ||      }i }|dk7  r|sddlm}m} |rt        d       t;               }|j=                  |	||      }d}t?        |jA                               D ]C  \  }}|jB                  jD                  |jF                  k7  rd|jB                  jD                   } n|jH                  rg ||<   |jH                  D ]y  }|jJ                  |jL                  k7  rd|jJ                   } nO|jD                  |jF                  k7  rd|jD                   } n%||   jO                  |jP                         |dz  }{ |r nd|jS                  ||jB                  jT                  |jB                  jV                         |jO                  |jB                  jP                         F |rt;               |z
  }t        d|d d!       |s|rAtY        |      dk(  rdnd"} dk(  rdnd"}!t        d#tY        |       d$|  d%|	 d&| d'| d(|!        dd)l-m.}"  |"|t_        j`                         d*d+d, ||      fd-|jc                         fd.|f/      |fS |d0k(  rd1| }te        |      |rt        d2|        tg        th        ||	||3      }# ||j+                               }$tj        jl                  jo                  |$|#d4d45      }%|%|fS )7zConstruct a LazyFrame scan.r   r(   Nz0IcebergDataset: to_dataset_scan(): snapshot ID: z	, limit: z, projection: zEIcebergDataset: to_dataset_scan(): tbl.metadata.current_snapshot_id: )*c                    t        d|        S )Nziceberg snapshot ID not found: )
ValueError)r   s    r!   snapshot_id_not_foundz=IcebergDataset.to_dataset_scan.<locals>.snapshot_id_not_founda   s     ?}MNNr#   POLARS_ICEBERG_READER_OVERRIDE)native	pyicebergz-iceberg: unknown value for reader_override: 'z*', expected one of ('native', 'pyiceberg')r6   z"forced reader_override='pyiceberg'   z"unsupported table format version: z#IcebergDataset: requested snapshot z did not contain a schema ID zCIcebergDataset: to_dataset_scan(): early return (snapshot_id_key = ))DataFileContent
FileFormatz7IcebergDataset: to_dataset_scan(): begin path expansion)r   r-   selected_fieldsznon-parquet format: z unsupported deletion file type: z"unsupported deletion file format:    )current_indexpartition_spec_idpartition_valuesz:IcebergDataset: to_dataset_scan(): finish path expansion (z.3fzs)sz:IcebergDataset: to_dataset_scan(): native scan_parquet(): z sourcez, snapshot ID: z, schema ID: z, z deletion file)scan_parquetinsertignoreziceberg-column-mappingicebergziceberg-position-delete)cast_optionsmissing_columnsextra_columns_column_mapping_default_values_deletion_filesr5   z)iceberg reader_override='native' failed: zGIcebergDataset: to_dataset_scan(): fallback to python[pyiceberg] scan: )r   n_rowswith_columnsT)pyarrowis_pure)r   r   returnr2   )8r*   r)   polars._utils.logging_utilsloggingr
   r	   r   r+   metadatacurrent_snapshot_idtupler   osgetenvr2   format_versionsnapshot_by_id	schema_idschemasr   r&   current_schema_idcurrent_snapshotselectr   pyiceberg.manifestr:   r;   r   scan	enumerate
plan_filesfilefile_formatPARQUETdelete_filescontentPOSITION_DELETESappend	file_pathpush_partition_valuesspec_id	partitionlenpolars.io.parquet.functionsrB   r   _default_icebergfinishr   r   r   plr   _scan_python_function)&r   r,   r-   r.   r)   polarsr
   tblr<   r   r3   r   msgfallback_reasonr[   snapshoticeberg_schemasnapshot_id_keyvprojected_iceberg_schemasourcesmissing_field_defaultsdeletion_filesr:   r;   
start_timera   total_deletion_filesi	file_infodeletion_fileelapsedrA   s2rB   funcr%   lfs&                                         r!   to_dataset_scanzIcebergDataset.to_dataset_scan?   s    	;$--''//1  $ 1 12 3 !)l, jjl558\\5U5U4VX
 %/$6&E*<M''	O // 
299,4
 6MM#$$NP  S/! +- 1 %%* 6c6H6H5IJ 	 	"))+6H+K88 **I 9+ G2 2  !o% [[]95N!)!5!5 6O ZZ\N66I -0,@,@,B'Bq&O1==/UW 
 *5-@+'+1.
  &( &&&8 	! !J$"
 02k)/FPQ%J88' /  D $%  )$//*; < %99>>--1C1CC.y~~/I/I.JK $ ))(*N1%)2)?)? 2(00O4T4TT B#0#8#8"9!; , "(44
8J8JJ D#0#<#<"=!? , "&q)001H1HI,1,!2$ #&<<"#&/nn&<&<%.^^%=%= =  y~~778K%9N &.:5..5c]">
 g,!+B/14R#.7|nGA3 /$$/= 1""+B+,N2$@ A,==? (&, &n5	! "+,B,I,I,K L!:N K   (=o=NOCs##77F6GI
 &##
 )6\\//	 0 
 ?""r#   c                    | j                   8| j                  d}t        |      | j                         j                  | _         | j                   S )zFetch the metadata path.1impl error: both metadata_path and table are None)r   r   r2   r+   metadata_location)r   rw   s     r!   metadata_pathzIcebergDataset.metadata_path*  sG    &{{"I o%"&**,"@"@D"""r#   c                   | j                   q| j                  d}t        |      t               rt	        d| j                         ddlm} |j                  | j                  | j                  xs i       | _         | j                   S )z!Fetch the PyIceberg Table object.r   z;IcebergDataset: construct table from self._metadata_path = r   )StaticTable)r   
properties)	r   r   r2   r
   r	   pyiceberg.tabler   from_metadatar   )r   rw   r   s      r!   r+   zIcebergDataset.table5  s    ;;""*I o%yUt?R?R>VWX3%33"&"5"5;;Ar 4 DK
 {{r#   c           
         | j                         | j                  | j                  | j                  d}t	               r>|d   }|d   x}d| dnd }t        |d         }|d   }t        d| d| d	| d
|        |S )N)r   r   r   r   r   r   'r   r   #IcebergDataset: getstate(): path: 'z', snapshot_id: z, iceberg_storage_properties: , reader_override: )r   r   r   r   r
   _redact_dict_valuesr	   )r   state	path_reprr|   r   	keys_reprr   s          r!   __getstate__zIcebergDataset.__getstate__O  s    !//1,,*.*J*J#44	
 9o.I,1-,@'@q&MAaS(SWK+E2N,OPI#$56O# %  +} -//8k :$$3#4	6 r#   c           
         t               r4|d   }|d   }t        |d         }|d   }t        d| d| d| d|        t        j	                  | |d   |d   |d   |d   	       y )
Nr   r   r   r   r   z', snapshot_id: 'z', iceberg_storage_properties: r   r   )r
   r   r	   r   r"   )r   r   r   r   r   r   s         r!   __setstate__zIcebergDataset.__setstate__g  s    9o.I.K+E2N,OPI#$56O# %!!, .//8k :$$3#4	6 	/"m,',-I'J!"34 	  	
r#   )
r    zstr | Tabler   
int | Noner   zdict[str, Any] | Noner   z%Literal['native', 'pyiceberg'] | NonerP   None)rP   z	pa.schema)r,   z
str | Noner-   r   r.   zlist[str] | NonerP   ztuple[LazyFrame, str] | None)rP   r   )rP   r   )rP   dict[str, Any])r   r   rP   r   )__name__
__module____qualname____doc__r"   r&   r%   r   r   r+   r   r    r#   r!   r   r      s    * #'<@AE!!  	!
 %:! ?! 
!4#8 59 '+e# (2e# 	e#
 %e# 
&e#V	#40
r#   r   c                    t        | t              r| j                         D ci c]  }|d c}S | dt        |       j                   dS dS c c}w )NREDACTED<z object>r   )r   dictkeystyper   )objks     r!   r   r     s_     c4  !$
+1J+ ? c##$H-
 +s   
A)r   r   rP   r   ) 
__future__r   rW   	functoolsr   timer   typingr   r   r   polars._reexport	_reexportrs   rQ   r	   r
   polars.exceptionsr   polars.io.iceberg._utilsr   r   #polars.io.scan_options.cast_optionsr   rN   par   r   polars.lazyframe.framer   r   r   r   r#   r!   <module>r      sI    " 	   . .  1 * @%0d
 d
Nr#   