
    bit                         d dl Z d dlmZ d dlmZ d dlZd dlZddl	m
Z
 ddlmZ ddlmZ erd dlZ G d	 d
eedef         Zy)    N)Mapping)TYPE_CHECKING   )config)
map_nested   )TensorFormatterc                        e Zd Zd fd	Zd Zd Zd ZdefdZde	j                  defd	Zde	j                  dd
fdZde	j                  defdZ xZS )TFFormatterc                 >    t         |   ||       || _        dd l}y )N)featurestoken_per_repo_idr   )super__init__tf_tensor_kwargs
tensorflow)selfr   r   r   tf	__class__s        [/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/formatting/tf_formatter.pyr   zTFFormatter.__init__!   s!    (>OP 0    c                     dd l t        t              rYrWt        fdD              r j                        S t        fdD              rj
                  j	                        S S )Nr   c              3      K   | ]V  }t        |j                        xr: |j                  d    j                  k(  xr |j                  d    j                  k(   X yw)r   N)
isinstanceTensorshapedtype.0xcolumnr   s     r   	<genexpr>z+TFFormatter._consolidate.<locals>.<genexpr>*   sU      kl
1bii(fQWWq	-GfAGGW]^_W`WfWfLffs   AAc              3      K   | ]U  }t        |j                  j                  f      xr- |j                  d k(  xr |j                  d   j                  k(   W yw)r   r   N)r   r   RaggedTensorndimr   r   s     r   r"   z+TFFormatter._consolidate.<locals>.<genexpr>.   s[       1ryy"//:;j!jPQPWPW[abc[d[j[jPjjs   AA)r   r   listallstackragged)r   r!   r   s    `@r   _consolidatezTFFormatter._consolidate&   si    fd# pv   rxx''  
 yyv..r   c                    dd l }||S i }t        |t        j                  t        j                  f      r=t        j
                  |j                  t        j                        rd|j                  i}nft        |t        j                  t        j                  f      r<t        j
                  |j                  t        j                        rd|j                  i}t        j                  rKdt        j                  v r9dd l}t        ||j                   j                         rt        j"                  |      }t        j$                  r&dt        j                  v rddlm} t        ||      r|S t        j*                  r*dt        j                  v rddlm}m} t        |||f      r|S  |j2                  |fi i || j4                  S )Nr   r   PILtorchvision)VideoReader
torchcodec)AudioDecoderVideoDecoder)r   r   npnumberndarray
issubdtyper   integerint64floatingfloat32r   PIL_AVAILABLEsysmodules	PIL.ImageImageasarrayTORCHVISION_AVAILABLEtorchvision.ior.   TORCHCODEC_AVAILABLEtorchcodec.decodersr0   r1   convert_to_tensorr   )r   valuer   default_dtyper,   r.   r0   r1   s           r   
_tensorizezTFFormatter._tensorize7   s?   =Lebii45"--UWU_U_:`$bhh/M		2::67BMM%++WYWbWb<c$bjj1MES[[$8%1

5)''MS[[,H2%-&&<3;;+FF%,!=>#r##EX-W-WAVAV-WXXr   c                    dd l }t        j                  rjdt        j                  v rXdd l}t        ||j                        r>| j                  |j                         j                         j                         d         S t        |d      r&t        ||j                        s|j                         }t        |t        j                        rA|j                   t"        k(  rr| j%                  |D cg c]  }| j'                  |       c}      S t        |t(        t*        f      r.| j%                  |D cg c]  }| j'                  |       c}      S | j                  |      S c c}w c c}w )Nr   torch 	__array__)r   r   TORCH_AVAILABLEr;   r<   rI   r   r   rG   detachcpunumpyhasattrrK   r2   r4   r   objectr*   recursive_tensorizer&   tuple)r   data_structr   rI   	substructs        r   _recursive_tensorizez TFFormatter._recursive_tensorizeV   s    !!g&<+u||4{'9'9';'?'?'A'G'G'I"'MNN;,ZRYY5W%//1Kk2::.  F*((_j)kR[$*B*B9*M)klldE]3$$[f%gid&>&>y&I%ghh{++ *l%gs   -E!1E&rT   c                 2    t        | j                  |d      S )NF)map_list)r   rV   )r   rT   s     r   rR   zTFFormatter.recursive_tensorizei   s    $33[5QQr   pa_tablereturnc                     | j                         j                  |      }| j                  j                  |      }| j	                  |      S N)numpy_arrow_extractorextract_rowpython_features_decoder
decode_rowrR   )r   rY   rows      r   
format_rowzTFFormatter.format_rowl   sB    ((*66x@**55c:'',,r   	tf.Tensorc                     | j                         j                  |      }| j                  j                  ||j                  d         }| j                  |      }| j                  |      }|S )Nr   )r]   extract_columnr_   decode_columncolumn_namesrR   r*   )r   rY   r!   s      r   format_columnzTFFormatter.format_columnq   sd    ++-<<XF--;;FHDYDYZ[D\]))&1""6*r   c                     | j                         j                  |      }| j                  j                  |      }| j	                  |      }|D ]  }| j                  ||         ||<    |S r\   )r]   extract_batchr_   decode_batchrR   r*   )r   rY   batchcolumn_names       r   format_batchzTFFormatter.format_batchx   sq    **,::8D,,99%@((/  	GK!%!2!253E!FE+	Gr   )NN)__name__
__module____qualname__r   r*   rG   rV   dictrR   paTabler   rb   rh   rn   __classcell__)r   s   @r   r   r       sk     
"Y>,&Rt R-288 - -
bhh ; RXX ' r   r   rc   )r;   collections.abcr   typingr   rO   r2   pyarrowrs    r   utils.py_utilsr   
formattingr	   r   r   r   rJ   r   r   <module>r|      sA      #      ' ' ^/';"?@ ^r   