
    bif7                        d dl Z d dlmZmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZ d dlZd dlZddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ erd dlZd dlmZ ddl m!Z!  G d de      Z"e G d d             Z#ddde$fdZ%ddde"fdZ&dejN                  de"fdZ(	 	 	 	 	 	 d$de)dee*e)ee+e)f   f      de)de
d   de,deee)d f      d!e
d"   ddfd#Z-y)%    N)	dataclassfield)Path)TYPE_CHECKINGAnyClassVarLiteralOptional	TypedDictUnion   )config)DownloadConfig)
array_cast)is_local_pathxopen)string_to_dictVideoDecoder   )FeatureTypec                   .    e Zd ZU ee   ed<   ee   ed<   y)ExamplepathbytesN)__name__
__module____qualname__r
   str__annotations__r        R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/features/video.pyr   r      s    
3-E?r"   r   c            
       Z   e Zd ZU dZdZeed<   dZee	   ed<   dZ
ed   ed<   d	Ze	ed
<   dZeeedf      ed<   dZed   ed<    edd      Zee   ed<   dZee   ed<    ej,                   ej.                          ej0                         d      Zee   ed<    ed dd      Zeed<   d Zdeeeeee jB                  df   defdZ"	 d%deeef   dee#eeeef   f      ddfd Z$ded!e#ed!f   f   fd"Z%d#eejL                  ejN                  ejP                  f   dejN                  fd$Z)y)&Videou=  
    Video [`Feature`] to read video data from a video file.

    Input: The Video feature accepts as input:
    - A `str`: Absolute path to the video file (i.e. random access is allowed).
    - A `pathlib.Path`: path to the video file (i.e. random access is allowed).
    - A `dict` with the keys:

        - `path`: String with relative path of the video file in a dataset repository.
        - `bytes`: Bytes of the video file.

      This is useful for parquet or webdataset files which embed video files.

    - A `torchcodec.decoders.VideoDecoder`: torchcodec video decoder object.

    Output: The Video features output data as `torchcodec.decoders.VideoDecoder` objects.

    Args:
        mode (`str`, *optional*):
            The mode to convert the video to. If `None`, the native mode of the video is used.
        decode (`bool`, defaults to `True`):
            Whether to decode the video data. If `False`,
            returns the underlying dictionary in the format `{"path": video_path, "bytes": video_bytes}`.
        stream_index (`int`, *optional*):
            The streaming index to use from the file. If `None` defaults to the "best" index.
        dimension_order (`str`, defaults to `NCHW`):
            The dimension order of the decoded frames.
            where N is the batch size, C is the number of channels,
            H is the height, and W is the width of the frames.
        num_ffmpeg_threads (`int`, defaults to `1`):
            The number of threads to use for decoding the video. (Recommended to keep this at 1)
        device (`str` or `torch.device`, defaults to `cpu`):
            The device to use for decoding the video.
        seek_mode (`str`, defaults to `exact`):
            Determines if frame access will be “exact” or “approximate”.
            Exact guarantees that requesting frame i will always return frame i, but doing so requires an initial scan of the file.
            Approximate is faster as it avoids scanning the file, but less accurate as it uses the file's metadata to calculate where i probably is.
            read more [here](https://docs.pytorch.org/torchcodec/stable/generated_examples/approximate_mode.html#sphx-glr-generated-examples-approximate-mode-py)

    Examples:

    ```py
    >>> from datasets import Dataset, Video
    >>> ds = Dataset.from_dict({"video":["path/to/Screen Recording.mov"]}).cast_column("video", Video())
    >>> ds.features["video"]
    Video(decode=True, id=None)
    >>> ds[0]["video"]
    <torchcodec.decoders._video_decoder.VideoDecoder object at 0x14a61e080>
    >>> video = ds[0]["video"]
    >>> video.get_frames_in_range(0, 10)
    FrameBatch:
    data (shape): torch.Size([10, 3, 50, 66])
    pts_seconds: tensor([0.4333, 0.4333, 0.4333, 0.4333, 0.4333, 0.4333, 0.4333, 0.4333, 0.4333,
            0.4333], dtype=torch.float64)
    duration_seconds: tensor([0.0167, 0.0167, 0.0167, 0.0167, 0.0167, 0.0167, 0.0167, 0.0167, 0.0167,
            0.0167], dtype=torch.float64)
    >>> ds.cast_column('video', Video(decode=False))[0]["video]
    {'bytes': None,
     'path': 'path/to/Screen Recording.mov'}
    ```
    TdecodeNstream_indexNCHWr(   NHWCdimension_orderr   num_ffmpeg_threadscputorch.devicedeviceexactr0   approximate	seek_modeF)defaultrepridz torchcodec.decoders.VideoDecoderdtyper   r   pa_type)r4   initr5   _typec                     | j                   S N)r9   )selfs    r#   __call__zVideo.__call__h   s    ||r"   valuer   returnc                    |t        d      t        j                  rddlm} nd}t        |t              rt        j                  |      }t        |t              r|ddS t        |t              rt        |j                               ddS t        |t        t        f      rd|dS t        |t        j                        rt        |      S |t        ||      rt!        |      S t        |t"              r`|j%                  d      |j%                  d      }}|$t&        j(                  j+                  |      rd|dS ||||dS t        d	| d
      t-        dt/        |             )a  Encode example into a format for Arrow.

        Args:
            value (`str`, `np.ndarray`, `bytes`, `bytearray`, `VideoDecoder` or `dict`):
                Data passed as input to Video feature.

        Returns:
            `dict` with "path" and "bytes" fields
        Nzvalue must be providedr   r   r   r   r   r   r8   zTA video sample should have one of 'path' or 'bytes' but they are missing or None in .z!Unsupported encode_example type: )
ValueErrorr   TORCHCODEC_AVAILABLEtorchcodec.decodersr   
isinstancelistnparrayr   r   absoluter   	bytearrayndarrayencode_np_arrayencode_torchcodec_videodictgetosr   isfile	TypeErrortype)r>   r@   r   r   bytes_s        r#   encode_examplezVideo.encode_examplek   sP    =566&&8LeT"HHUOEeS!!D11t$ 01DAAy12 511rzz*"5))%*UL*I*511t$ 99V,eii.@&DBGGNN4$8!%t44#t'7!'66 jkpjqqrs  ?U}MNNr"   token_per_repo_idc                    | j                   st        d      t        j                  rddlm} nt        d      |i }t        |t              r|d}}n
|d   |d   }}||t        d| d	      t        |      rA ||| j                  | j                  | j                  | j                  | j                  
      }nzt!        ||| j                  | j                  | j                  | j                        }n@ ||| j                  | j                  | j                  | j                  | j                  
      }||d|_        ||j$                  _        |S )a  Decode example video file into video data.

        Args:
            value (`str` or `dict`):
                A string with the absolute video file path, a dictionary with
                keys:

                - `path`: String with absolute or relative video file path.
                - `bytes`: The bytes of the video file.
            token_per_repo_id (`dict`, *optional*):
                To access and decode
                video files from private repositories on the Hub, you can pass
                a dictionary repo_id (`str`) -> token (`bool` or `str`).

        Returns:
            `torchcodec.decoders.VideoDecoder`
        zMDecoding is disabled for this feature. Please use Video(decode=True) instead.r   r   z8To support decoding videos, please install 'torchcodec'.Nr   r   zBA video should have one of 'path' or 'bytes' but both are None in rD   r'   r+   r,   r/   r3   )rY   r+   r,   r/   r3   rC   )r&   RuntimeErrorr   rF   rG   r   ImportErrorrH   r   rE   r   r'   r+   r,   r/   r3   hf_video_reader_hf_encodedmetadatar   )r>   r@   rY   r   r   rW   videos          r#   decode_examplezVideo.decode_example   sK   , {{noo&&8 XYY$ "eS! $&D =%.&D>| #efkellm!noot$$!%!2!2$($8$8'+'>'>;;"nn (&7$($8$8'+'>'>;;"nn !!.. $ 4 4#'#:#:{{..E &*F;"r"   r   c                 L    ddl m} | j                  r| S  |d       |d      dS )zfIf in the decodable state, return the feature itself, otherwise flatten the feature into a dictionary.r   )Valuebinarystringr8   )featuresrd   r&   )r>   rd   s     r#   flattenzVideo.flatten   s2    # {{ 	
 xh	
r"   storagec           
         t         j                  j                  |j                        rlt        j                  dgt        |      z  t        j                               }t         j                  j                  ||gddg|j                               }nt         j                  j                  |j                        rlt        j                  dgt        |      z  t        j                               }t         j                  j                  ||gddg|j                               }n&t         j                  j                  |j                        r|j                  j                  d      dk\  r|j                  d      }n6t        j                  dgt        |      z  t        j                               }|j                  j                  d      dk\  r|j                  d      }n6t        j                  dgt        |      z  t        j                               }t         j                  j                  ||gddg|j                               }nt         j                  j                  |j                        rt        j                  |j!                         D cg c]'  }|!t#        t%        j                  |            d   nd) c}t        j                               }t        j                  dgt        |      z  t        j                               }t         j                  j                  ||gddg|j                               }t'        || j(                        S c c}w )a'  Cast an Arrow array to the Video arrow storage type.
        The Arrow types that can be converted to the Video pyarrow storage type are:

        - `pa.string()` - it must contain the "path" data
        - `pa.binary()` - it must contain the video bytes
        - `pa.struct({"bytes": pa.binary()})`
        - `pa.struct({"path": pa.string()})`
        - `pa.struct({"bytes": pa.binary(), "path": pa.string()})`  - order doesn't matter
        - `pa.list(*)` - it must contain the video array data

        Args:
            storage (`Union[pa.StringArray, pa.StructArray, pa.ListArray]`):
                PyArrow array to cast.

        Returns:
            `pa.StructArray`: Array in the Video arrow storage type, that is
                `pa.struct({"bytes": pa.binary(), "path": pa.string()})`.
        N)rV   r   r   )maskr   )patypes	is_stringrV   rK   lenre   StructArrayfrom_arraysis_null	is_binaryrf   	is_structget_field_indexr   is_list	to_pylistrO   rJ   r   r9   )r>   ri   bytes_array
path_arrayarrs        r#   cast_storagezVideo.cast_storage   st   & 88gll+((D6CL#8ryy{KKnn00+w1G'SYIZahapapar0sGXX-4&3w<"7biikJJnn00':1FRXHY`g`o`o`q0rGXX-||++G49%mmG4 hhvG'<299;O||++F3q8$]]62
XXtfs7|&;"))+N
nn00+z1JWV\L]dkdsdsdu0vGXXgll+((ahararatuZ]CO#/8QUUuYY[K 4&3w<"7biikJJnn00j)GV+<;CVCVCX 1 G '4<<00 vs    ,M'r=   )*r   r   r   __doc__r&   boolr    r'   r
   intr+   r	   r,   r/   r   r   r3   r   r6   r7   r   rl   structre   rf   r9   r   r;   r?   r   rM   r   rJ   rN   rX   rQ   rb   rh   StringArrayrp   	ListArrayr{   r!   r"   r#   r%   r%      s   <| FD"&L(3-&/5OW^,538FHU3./0818Iw-.8d7B7=E8C==&RYYibiik'RSGXc]SwU?E3?.OE#ui"**Vd*d$e .Ojq .Of DHGS'\"G $DeD#I.>)>$?@G 
	GR
}d33E.FFG 
,1E".."..",,*V$W ,1\^\j\j ,1r"   r%   ra   r   rA   c                     t               )zOConvert a torchcodec Video object to bytes using native compression if possibleNotImplementedErrorra   s    r#   video_to_bytesr      s    

r"   c                 H    t        | d      r| j                  S t        d      )Nr_   zYEncoding a VideoDecoder that doesn't come from datasets.Video.decode() is not implemented)hasattrr_   r   r   s    r#   rP   rP   %  s(    um$   !g
 	
r"   rK   c                     t               r=   r   )rK   s    r#   rO   rO   .  s    

r"   r   rY   streamr+   r)   r,   r/   r.   r3   r1   c                    ddl m} |i }| j                  d      d   }|j                  t        j
                        rt        j                  nt        j                  }	t        ||	      }
|
|j                  |
d         nd }t        |      }t        | d|      }t        |j                  d	            d
k(  rdnt        |j                  d	      d
         } |||||||      }|S )Nr   r   z::repo_id)tokenrb)download_config:r   r[   )rG   r   split
startswithr   HF_ENDPOINTHUB_DATASETS_URLHUB_DATASETS_HFFS_URLr   rR   r   r   ro   r~   )r   rY   r   r+   r,   r/   r3   r   
source_urlpatternsource_url_fieldsr   r   f	stream_idvds                   r#   r^   r^   7  s     1  D!"%J)3)>)>v?Q?Q)Rf%%X^XtXtG&z7;CTC`!!"3I">?fjE$51OdD/:A c*+q0c&,,s:KA:N6OI		'-
B Ir"   )Nra   r(   r   r-   r0   ).rS   dataclassesr   r   pathlibr   typingr   r   r   r	   r
   r   r   numpyrJ   pyarrowrl    r   download.download_configr   tabler   utils.file_utilsr   r   utils.py_utilsr   torchrG   r   rg   r   r   r%   r   r   rP   rN   rO   r   rQ   r}   r~   r^   r!   r"   r#   <module>r      s9   	 (  T T T    5  3 + 0%i 
 @1 @1 @1F .  U  

> 
g 
 2::  '   @D/53818
S%c	*:%: ;<  ^,	
  U3./0 -. r"   