
    biY                     T   d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
c mZ ddlmZmZ ddlmZ ddlmZ e G d	 d
e             Zdej*                  dedej.                  fdZdej.                  deej2                     dej.                  fdZddefdZddefdZ G d dee      Zy)    )	dataclass)OptionalTupleUnionN   )ConfigMixinregister_to_config)
BaseOutput   )SchedulerMixinc                   0    e Zd ZU dZej
                  ed<   y)VQDiffusionSchedulerOutputa.  
    Output class for the scheduler's step function output.

    Args:
        prev_sample (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
            Computed sample x_{t-1} of previous timestep. `prev_sample` should be used as next model input in the
            denoising loop.
    prev_sampleN)__name__
__module____qualname____doc__torch
LongTensor__annotations__     g/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/schedulers/scheduling_vq_diffusion.pyr   r      s     !!!r   r   xnum_classesreturnc                     t        j                  | |      }|j                  ddd      }t        j                  |j                         j                  d            }|S )a  
    Convert batch of vector of class indices into batch of log onehot vectors

    Args:
        x (`torch.LongTensor` of shape `(batch size, vector length)`):
            Batch of class indices

        num_classes (`int`):
            number of classes to be used for the onehot vectors

    Returns:
        `torch.Tensor` of shape `(batch size, num classes, vector length)`:
            Log onehot vectors
    r   r   r   KH9)min)Fone_hotpermuter   logfloatclamp)r   r   x_onehotlog_xs       r   index_to_log_onehotr(   )   sP     yyK(H1a(HIIhnn&,,,78ELr   logits	generatorc                     t        j                  | j                  | j                  |      }t        j                  t        j                  |dz          dz          }|| z   }|S )z(
    Apply gumbel noise to `logits`
    )devicer*   r   )r   randshaper,   r#   )r)   r*   uniformgumbel_noisenoiseds        r   gumbel_noisedr2   >   sS     jjfmmyQGIIuyy599EABBLF"FMr   num_diffusion_timestepsc                     t        j                  d|       | dz
  z  ||z
  z  |z   }t        j                  dg|f      }|dd |dd z  }t        j                  |dd dgf      }||fS )zN
    Cumulative and non-cumulative alpha schedules.

    See section 4.1.
    r   r   Nnparangeconcatenate)r3   alpha_cum_startalpha_cum_endattats        r   alpha_schedulesr>   H   s     			!,-1H11LMQ^apQpq
	  ..1#s
$C	QR3s8	B
..#ab'A3
(Cs7Nr   c                     t        j                  d|       | dz
  z  ||z
  z  |z   }t        j                  dg|f      }d|z
  }|dd |dd z  }d|z
  }t        j                  |dd dgf      }||fS )zN
    Cumulative and non-cumulative gamma schedules.

    See section 4.1.
    r   r   Nr5   r6   )r3   gamma_cum_startgamma_cum_endcttone_minus_cttone_minus_ctcts          r   gamma_schedulesrF   X   s     			!,-1H11LMQ^apQpq
	  ..1#s
$CGM $}Sb'99L	
\	B
..#ab'A3
(Cs7Nr   c                   `   e Zd ZdZdZe	 	 	 	 	 ddedededededefd	       Zddede	e
ej                  f   fdZ	 	 ddej                  dej                  dej                   deej$                     dede	eef   fdZd Zdej                  dej                   dej                  defdZd Zy
)VQDiffusionSchedulera  
    A scheduler for vector quantized diffusion.

    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
    methods the library implements for all schedulers such as loading and saving.

    Args:
        num_vec_classes (`int`):
            The number of classes of the vector embeddings of the latent pixels. Includes the class for the masked
            latent pixel.
        num_train_timesteps (`int`, defaults to 100):
            The number of diffusion steps to train the model.
        alpha_cum_start (`float`, defaults to 0.99999):
            The starting cumulative alpha value.
        alpha_cum_end (`float`, defaults to 0.00009):
            The ending cumulative alpha value.
        gamma_cum_start (`float`, defaults to 0.00009):
            The starting cumulative gamma value.
        gamma_cum_end (`float`, defaults to 0.99999):
            The ending cumulative gamma value.
    r   num_vec_classesnum_train_timestepsr:   r;   r@   rA   c                    || _         | j                   dz
  | _        t        |||      \  }}t        |||      \  }	}
| j                   dz
  }d|z
  |	z
  |z  }d|z
  |
z
  |z  }t	        j
                  |j                  d            }t	        j
                  |j                  d            }t	        j
                  |	j                  d            }	t	        j                  |      }t	        j                  |      }t	        j                  |	      }t	        j
                  |j                  d            }t	        j
                  |j                  d            }t	        j
                  |
j                  d            }
t	        j                  |      }t	        j                  |      }t	        j                  |
      }|j                         | _	        |j                         | _
        |j                         | _        |j                         | _        |j                         | _        |j                         | _        d | _        t	        j                   t#        j$                  d|      d d d   j'                               | _        y )Nr   )r:   r;   )r@   rA   float64r   r5   )	num_embed
mask_classr>   rF   r   tensorastyper#   r$   log_atlog_btlog_ctlog_cumprod_atlog_cumprod_btlog_cumprod_ctnum_inference_steps
from_numpyr7   r8   copy	timesteps)selfrI   rJ   r:   r;   r@   rA   r=   r<   rE   rB   num_non_mask_classesbtbttrQ   rR   rS   rT   rU   rV   s                       r   __init__zVQDiffusionScheduler.__init__   s    ) ..1,!"5fstC!"5fstC#~~1"frk113w} 44\\"))I./\\"))I./\\"))I./222ll3::i01ll3::i01ll3::i01333llnllnlln,224,224,224 $( ))"))A7J*KDbD*Q*V*V*XYr   NrW   r,   c                 L   || _         t        j                  d| j                         ddd   j                         }t	        j
                  |      j                  |      | _        | j                  j                  |      | _        | j                  j                  |      | _	        | j                  j                  |      | _
        | j                  j                  |      | _        | j                  j                  |      | _        | j                  j                  |      | _        y)a  
        Sets the discrete timesteps used for the diffusion chain (to be run before inference).

        Args:
            num_inference_steps (`int`):
                The number of diffusion steps used when generating samples with a pre-trained model.
            device (`str` or `torch.device`, *optional*):
                The device to which the timesteps and diffusion process parameters (alpha, beta, gamma) should be moved
                to.
        r   Nr5   )rW   r7   r8   rY   r   rX   torZ   rQ   rR   rS   rT   rU   rV   )r[   rW   r,   rZ   s       r   set_timestepsz"VQDiffusionScheduler.set_timesteps   s     $7 IIa!9!9:4R4@EEG	)))477?kknnV,kknnV,kknnV,"1144V<"1144V<"1144V<r   model_outputtimestepsampler*   return_dictr   c                     |dk(  r|}n| j                  |||      }t        ||      }|j                  d      }|s|fS t        |      S )a  
        Predict the sample from the previous timestep by the reverse transition distribution. See
        [`~VQDiffusionScheduler.q_posterior`] for more details about how the distribution is computer.

        Args:
            log_p_x_0: (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`):
                The log probabilities for the predicted classes of the initial latent pixels. Does not include a
                prediction for the masked class as the initial unnoised image cannot be masked.
            t (`torch.long`):
                The timestep that determines which transition matrices are used.
            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
                The classes of each latent pixel at time `t`.
            generator (`torch.Generator`, or `None`):
                A random number generator for the noise applied to `p(x_{t-1} | x_t)` before it is sampled from.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or
                `tuple`.

        Returns:
            [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or `tuple`:
                If return_dict is `True`, [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] is
                returned, otherwise a tuple is returned where the first element is the sample tensor.
        r   r   dim)r   )q_posteriorr2   argmaxr   )r[   rc   rd   re   r*   rf   log_p_x_t_min_1	x_t_min_1s           r   stepzVQDiffusionScheduler.step   sY    > q=*O"..|VXNO'C#**q*1	<)i@@r   c                    t        || j                        }| j                  |||d      }| j                  |||d      }||z
  }t        j                  |dd      }||z
  }| j                  ||dz
        }||z   |z   }	|	S )a  
        Calculates the log probabilities for the predicted classes of the image at timestep `t-1`:

        ```
        p(x_{t-1} | x_t) = sum( q(x_t | x_{t-1}) * q(x_{t-1} | x_0) * p(x_0) / q(x_t | x_0) )
        ```

        Args:
            log_p_x_0 (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`):
                The log probabilities for the predicted classes of the initial latent pixels. Does not include a
                prediction for the masked class as the initial unnoised image cannot be masked.
            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
                The classes of each latent pixel at time `t`.
            t (`torch.Long`):
                The timestep that determines which transition matrix is used.

        Returns:
            `torch.Tensor` of shape `(batch size, num classes, num latent pixels)`:
                The log probabilities for the predicted classes of the image at timestep `t-1`.
        T)tx_tlog_onehot_x_t
cumulativeFr   )ri   keepdim)r(   rM   $log_Q_t_transitioning_to_known_classr   	logsumexpapply_cumulative_transitions)
r[   	log_p_x_0rq   rp   rr   log_q_x_t_given_x_0log_q_t_given_x_t_min_1qq_log_sum_exprl   s
             r   rj   z VQDiffusionScheduler.q_posterior   s    * -S$..A"GGSD H 
 #'"K"KSE #L #
 ++ q$?  --aQ7 55E\ r   rp   rq   rr   rs   c                   |r.| j                   |   }| j                  |   }| j                  |   }n-| j                  |   }| j                  |   }| j
                  |   }|s|dddddf   j                  d      }|ddddddf   }||z   j                  |      }	|| j                  k(  }
|
j                  d      j                  d| j                  dz
  d      }
||	|
<   |st        j                  |	fd      }	|	S )a/	  
        Calculates the log probabilities of the rows from the (cumulative or non-cumulative) transition matrix for each
        latent pixel in `x_t`.

        Args:
            t (`torch.Long`):
                The timestep that determines which transition matrix is used.
            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
                The classes of each latent pixel at time `t`.
            log_onehot_x_t (`torch.Tensor` of shape `(batch size, num classes, num latent pixels)`):
                The log one-hot vectors of `x_t`.
            cumulative (`bool`):
                If cumulative is `False`, the single step transition matrix `t-1`->`t` is used. If cumulative is
                `True`, the cumulative transition matrix `0`->`t` is used.

        Returns:
            `torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`:
                Each _column_ of the returned matrix is a _row_ of log probabilities of the complete probability
                transition matrix.

                When non cumulative, returns `self.num_classes - 1` rows because the initial latent pixel cannot be
                masked.

                Where:
                - `q_n` is the probability distribution for the forward process of the `n`th latent pixel.
                - C_0 is a class of a latent pixel embedding
                - C_k is the class of the masked latent pixel

                non-cumulative result (omitting logarithms):
                ```
                q_0(x_t | x_{t-1} = C_0) ... q_n(x_t | x_{t-1} = C_0)
                          .      .                     .
                          .               .            .
                          .                      .     .
                q_0(x_t | x_{t-1} = C_k) ... q_n(x_t | x_{t-1} = C_k)
                ```

                cumulative result (omitting logarithms):
                ```
                q_0_cumulative(x_t | x_0 = C_0)    ...  q_n_cumulative(x_t | x_0 = C_0)
                          .               .                          .
                          .                        .                 .
                          .                               .          .
                q_0_cumulative(x_t | x_0 = C_{k-1}) ... q_n_cumulative(x_t | x_0 = C_{k-1})
                ```
        Nr5   r   rh   )rT   rU   rV   rQ   rR   rS   	unsqueeze	logaddexprN   expandrM   r   cat)r[   rp   rq   rr   rs   abc(log_onehot_x_t_transitioning_from_maskedlog_Q_tmask_class_masks              r   ru   z9VQDiffusionScheduler.log_Q_t_transitioning_to_known_classd  s   b ##A&A##A&A##A&AAAAAAA 8FaQh7O7Y7YZ[7\4
 (3B3	2 "A%003 0)33A6==b$..STBTVXY#$ ii*R SYZ[Gr   c                    |j                   d   }| j                  |   }| j                  |   }| j                  |   }|j                   d   }|j	                  |d|      }||z   j                  |      }t        j                  ||fd      }|S )Nr   r   r   rh   )r.   rT   rU   rV   r   r   r   r   )r[   r{   rp   bszr   r   r   num_latent_pixelss           r   rw   z1VQDiffusionScheduler.apply_cumulative_transitions  s    ggaj"""GGAJHHS!./Ua IIq!f!$r   )d   wJ??̔>r   r   )N)NT)r   r   r   r   orderr	   intr$   r_   r   strr   r,   rb   Tensorlongr   r   	Generatorboolr   r   rn   rj   ru   rw   r   r   r   rH   rH   j   sK   , E $'!('!)&,Z,Z !,Z 	,Z
 ,Z ,Z ,Z ,Z\= =eCDU>V =6 04 +All+A **+A   	+A
 EOO,+A +A 
)50	1+AZm^aIIa$)$4$4aFKlla`daFr   rH   )r   r   )r   r   )dataclassesr   typingr   r   r   numpyr7   r   torch.nn.functionalnn
functionalr    configuration_utilsr   r	   utilsr
   scheduling_utilsr   r   r   r   r   r(   r   r2   r>   rF   rH   r   r   r   <module>r      s    " ) )     A  , 
" 
" 
"5++ # %,, *%,, 8EOO3L QVQ]Q] S  S $i>; ir   