
    bi                         d dl ZddlmZmZ ddlmZ 	 d dlZdZdZ	d d
lmZ  G d dee      Zy# e
$ rZdZde d	Z	Y dZ[%dZ[ww xY w)    N   )ConfigMixinregister_to_config)SchedulerMixinT FzCannot import librosa because zB. Make sure to correctly install librosa to be able to install it.)Imagec                   "   e Zd ZdZdZe	 	 	 	 	 	 	 ddedededededed	efd
       ZdedefdZdde	de
j                  fdZdefdZddede
j                  fdZdefdZdedej                   fdZdej                   de
j                  fdZy)Mela+  
    Parameters:
        x_res (`int`):
            x resolution of spectrogram (time).
        y_res (`int`):
            y resolution of spectrogram (frequency bins).
        sample_rate (`int`):
            Sample rate of audio.
        n_fft (`int`):
            Number of Fast Fourier Transforms.
        hop_length (`int`):
            Hop length (a higher number is recommended if `y_res` < 256).
        top_db (`int`):
            Loudest decibel value.
        n_iter (`int`):
            Number of iterations for Griffin-Lim Mel inversion.
    zmel_config.jsonx_resy_ressample_raten_fft
hop_lengthtop_dbn_iterc                     || _         || _        || _        || _        || _        | j                  ||       d | _        t        st        t              y )N)
r   srr   r   r   set_resolutionaudio_librosa_can_be_imported
ValueError_import_error)selfr   r   r   r   r   r   r   s           m/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/deprecated/audio_diffusion/mel.py__init__zMel.__init__:   sQ     %
E5)
']++ (    c                     || _         || _        | j                  | _        | j                   | j                  z  dz
  | _        y)zSet resolution.

        Args:
            x_res (`int`):
                x resolution of spectrogram (time).
            y_res (`int`):
                y resolution of spectrogram (frequency bins).
           N)r   r   n_melsr   
slice_size)r   r   r   s      r   r   zMel.set_resolutionP   s6     

jj**t6:r   N
audio_file	raw_audioc           	         |+t        j                  |d| j                        \  | _        }n|| _        t	        | j                        | j
                  | j                  z  k  rht        j                  | j                  t        j                  | j
                  | j                  z  t	        | j                        z
  f      g      | _        yy)a  Load audio.

        Args:
            audio_file (`str`):
                An audio file that must be on disk due to [Librosa](https://librosa.org/) limitation.
            raw_audio (`np.ndarray`):
                The raw audio file as a NumPy array.
        NT)monor   )
librosaloadr   r   lenr   r   npconcatenatezeros)r   r!   r"   _s       r   
load_audiozMel.load_audio^   s     !#LL$477KMDJ"DJ tzz?TZZ$//99RXXtzzDOO?[^abfblbl^m?m>o5p(qrDJ :r   returnc                 F    t        | j                        | j                  z  S )zGet number of slices in audio.

        Returns:
            `int`:
                Number of spectograms audio can be sliced into.
        )r'   r   r    r   s    r   get_number_of_sliceszMel.get_number_of_slicesp   s     4::$//11r   slicec                 Z    | j                   | j                  |z  | j                  |dz   z   S )zGet slice of audio.

        Args:
            slice (`int`):
                Slice number of audio (out of `get_number_of_slices()`).

        Returns:
            `np.ndarray`:
                The audio slice as a NumPy array.
        r   )r   r    )r   r1   s     r   get_audio_slicezMel.get_audio_slicey   s+     zz$//E1DOOuqy4QRRr   c                     | j                   S )zdGet sample rate.

        Returns:
            `int`:
                Sample rate of audio.
        )r   r/   s    r   get_sample_ratezMel.get_sample_rate   s     wwr   c                    t         j                  j                  | j                  |      | j                  | j
                  | j                  | j                        }t        j                  |t        j                  | j                        }|| j                  z   dz  | j                  z  j                  dd      dz   j                  t        j                        }t        j                   |      }|S )a  Convert slice of audio to spectrogram.

        Args:
            slice (`int`):
                Slice number of audio to convert (out of `get_number_of_slices()`).

        Returns:
            `PIL Image`:
                A grayscale image of `x_res x y_res`.
        )yr   r   r   r   )refr      r   g      ?)r%   featuremelspectrogramr3   r   r   r   r   power_to_dbr(   maxr   clipastypeuint8r   	fromarray)r   r1   Slog_Sbytedataimages         r   audio_slice_to_imagezMel.audio_slice_to_image   s     OO**""5)dggTZZTXTcTclplwlw + 
 ##A266$++Fdkk)S04;;>DDQLsRZZ[][c[cd)r   rE   c                    t        j                  |j                         d      j                  |j                  |j
                  f      }|j                  d      | j                  z  dz  | j                  z
  }t        j                  |      }t        j                  j                  j                  || j                  | j                  | j                  | j                         }|S )zConverts spectrogram to audio.

        Args:
            image (`PIL Image`):
                An grayscale image of `x_res x y_res`.

        Returns:
            audio (`np.ndarray`):
                The audio as a NumPy array.
        r@   )dtypefloatr9   )r   r   r   r   )r(   
frombuffertobytesreshapeheightwidthr?   r   r%   db_to_powerr:   inversemel_to_audior   r   r   r   )r   rE   rD   rC   rB   r   s         r   image_to_audiozMel.image_to_audio   s     ==@HH%,,X]XcXcIde(4;;6<t{{J&''44$''PTP[P[ 5 
 r   )   rS   i"V  i   i   P       )NN)r   )__name__
__module____qualname____doc__config_namer   intr   r   strr(   ndarrayr,   r0   r3   r5   r   rF   rR    r   r   r
   r
   %   s   $ $K  ,, , 	,
 , , , , ,*;C ; ;sS sBJJ s$2c 2SS S S # %++ &EKK BJJ r   r
   )numpyr(   configuration_utilsr   r   schedulers.scheduling_utilsr   r%   r   r   	ExceptionePILr   r
   r^   r   r   <module>re      sb      C :	#M N+~ N  $
(+mn s   / AAA