
    biy                        d dl mZmZmZ d dlZd dlmZ d dlmc mZ	 ddl
mZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlmZmZ  G d dej>                        Z  G d dej>                        Z!	 d%de"de#de#de#de"de"dee#   fdZ$ G d dej>                        Z% G d dej>                        Z& G d d ej>                        Z' G d! d"ej>                        Z( G d# d$eee      Z)y)&    )OptionalTupleUnionN   )ConfigMixinregister_to_config)FromOriginalModelMixin)apply_forward_hook   )get_activation)SanaMultiscaleLinearAttention)
ModelMixin)RMSNormget_normalization)	GLUMBConv   )DecoderOutputEncoderOutputc                   r     e Zd Z	 	 d
dededededdf
 fdZdej                  dej                  fd	Z xZ	S )ResBlockin_channelsout_channels	norm_typeact_fnreturnNc                    t         |           || _        |t        |      nt	        j
                         | _        t	        j                  ||ddd      | _        t	        j                  ||dddd      | _	        t        ||      | _        y )Nr   r   F)bias)super__init__r   r   nnIdentitynonlinearityConv2dconv1conv2r   norm)selfr   r   r   r   	__class__s        g/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_dc.pyr   zResBlock.__init__"   sq     	"6<6HN62bkkmYY{KAqA
YY{L!QN
%i>	    hidden_statesc                 "   |}| j                  |      }| j                  |      }| j                  |      }| j                  dk(  r6| j	                  |j                  dd            j                  dd      }||z   S | j	                  |      }||z   S )Nrms_normr   )r$   r"   r%   r   r&   movedim)r'   r+   residuals      r)   forwardzResBlock.forward2   s     

=1))-8

=1>>Z' IIm&;&;Ar&BCKKBPQRM x'' !IIm4Mx''r*   )
batch_normrelu6)
__name__
__module____qualname__intstrr   torchTensorr1   __classcell__r(   s   @r)   r   r   !   s[    
 &?? ? 	?
 ? 
? (U\\ (ell (r*   r   c                        e Zd Z	 	 	 	 ddedededeedf   deddf fd	Zd
ej                  dej                  fdZ
 xZS )EfficientViTBlockr   multattention_head_dimqkv_multiscales.r   r   Nc           	      v    t         |           t        ||||||d      | _        t	        ||d      | _        y )NT)r   r   r?   r@   r   kernel_sizesresidual_connectionr-   )r   r   r   )r   r   r   attnr   conv_out)r'   r   r?   r@   rA   r   r(   s         r)   r   zEfficientViTBlock.__init__B   sH     	1#$1( $
	 "#$ 
r*   xc                 J    | j                  |      }| j                  |      }|S )N)rE   rF   )r'   rG   s     r)   r1   zEfficientViTBlock.forward\   s"    IIaLMM!r*   )      ?       r2   )r4   r5   r6   r7   floatr   r8   r   r9   r:   r1   r;   r<   s   @r)   r>   r>   A   ss     "$+/%

 
  	

 sCx
 
 

4 %,, r*   r>   
block_typer   r   r@   r   r   qkv_mutliscalesc                 v    | dk(  rt        ||||      }|S | dk(  rt        ||||      }|S t        d| d      )Nr   r>   )r@   r   rA   zBlock with block_type=z is not supported.)r   r>   
ValueError)rN   r   r   r@   r   r   rO   blocks           r)   	get_blockrS   b   sb     ZlIvF L 
*	*!,>)et
 L 2zm3EFGGr*   c                   n     e Zd Zd
dededededdf
 fdZdej                  dej                  fd	Z xZ	S )DCDownBlock2dr   r   
downsampleshortcutr   Nc                 (   t         |           || _        d| _        |rdnd| _        || j                  dz  z  |z  | _        || _        | j                  dz  }|r||z  dk(  sJ ||z  }t        j                  ||d| j                  d      | _	        y )Nr   r   r   r   kernel_sizestridepadding)
r   r   rV   factorr[   
group_sizerW   r    r#   conv)r'   r   r   rV   rW   	out_ratior(   s         r)   r   zDCDownBlock2d.__init__z   s    $%a1%Q6,F KKN	)+q000'94LII;;
	r*   r+   c                 J   | j                  |      }| j                  r t        j                  || j                        }| j
                  rWt        j                  || j                        }|j                  dd| j                  f      }|j                  d      }||z   }|S |}|S Nr   r.   r   dim)	r_   rV   Fpixel_unshuffler]   rW   	unflattenr^   meanr'   r+   rG   ys       r)   r1   zDCDownBlock2d.forward   s    IIm$??!!!T[[1A==!!-=AADOO45A1AEM  Mr*   )FT)
r4   r5   r6   r7   boolr   r9   r:   r1   r;   r<   s   @r)   rU   rU   y   sH    
C 
s 
 
`d 
pt 
,U\\ ell r*   rU   c                   x     e Zd Z	 	 	 ddedededededdf fdZd	ej                  dej                  fd
Z	 xZ
S )DCUpBlock2dr   r   interpolaterW   interpolation_moder   Nc                     t         |           || _        || _        || _        d| _        || j
                  dz  z  |z  | _        | j
                  dz  }|s||z  }t        j                  ||ddd      | _	        y )Nr   r   r   )
r   r   rn   ro   rW   r]   repeatsr    r#   r_   )r'   r   r   rn   rW   ro   r`   r(   s          r)   r   zDCUpBlock2d.__init__   sy     	&"4 #dkk1n4CKKN	')3LIIk<AqA	r*   r+   c                    | j                   r>t        j                   || j                  | j                        }| j	                  |      }n1| j	                  |      }t        j
                  || j                        }| j                  r_|j                  | j                  d|j                  d   | j                  z        }t        j
                  || j                        }||z   }|S |}|S )N)scale_factormoder   rd   output_size)
rn   re   r]   ro   r_   pixel_shufflerW   repeat_interleaverq   shaperi   s       r)   r1   zDCUpBlock2d.forward   s    m$++DLcLcdA		!A		-(A4;;/A==//!Q^QdQdefQgjnjvjvQv/wA4;;/AEM  Mr*   )FTnearest)r4   r5   r6   r7   rk   r8   r   r9   r:   r1   r;   r<   s   @r)   rm   rm      so    
 ""+BB B 	B
 B  B 
B.U\\ ell r*   rm   c                        e Zd Z	 	 	 	 	 	 	 ddedededeeee   f   dee   dee   deeedf   df   d	ed
ef fdZde	j                  de	j                  fdZ xZS )Encoderr   latent_channelsr@   rN   block_out_channelslayers_per_blockrA   .downsample_block_typeout_shortcutc
                 .   t         |           t        |      }
t        |t              r|f|
z  }|d   dkD  r0t        j                  ||d   dkD  r|d   n|d   ddd      | _        n't        ||d   dkD  r|d   n|d   |dk(  d      | _        g }t        t        ||            D ]  \  }\  }}g }t        |      D ]+  }t        ||   |||dd	||   
      }|j                  |       - ||
dz
  k  r.|dkD  r)t        |||dz      |dk(  d      }|j                  |       |j                  t        j                  |         t        j                  |      | _        t        j                  |d   |ddd      | _        |	| _        |	r|d   |z  | _        y y )Nr   r   r   rY   rf   F)r   r   rV   rW   r-   silur@   r   r   rO   Tr.   )r   r   len
isinstancer8   r    r#   conv_inrU   	enumerateziprangerS   append
Sequential
ModuleListdown_blocksrF   r   out_shortcut_average_group_size)r'   r   r}   r@   rN   r~   r   rA   r   r   
num_blocksr   iout_channel
num_layersdown_block_list_rR   downsample_blockr(   s                      r)   r   zEncoder.__init__   s    	+,
j#&$3JA"99)9!)<q)@"1%FXYZF[DL )'6Fq6IA6M/2SefgSh04EE	DL ,5c:LN^6_,` 	@(A(Z O:& 
.!qM'9(!$3A$6  &&u-
. :>!j1n#0 +!3AE!:48II!	$   &&'78r}}o>?1	@4 ==5		"4R"8/1aQRS(3Eb3I_3\D0 r*   r+   r   c                     | j                  |      }| j                  D ]
  } ||      } | j                  rF|j                  dd| j                  f      }|j                  d      }| j                  |      |z   }|S | j                  |      }|S rb   )r   r   r   rg   r   rh   rF   )r'   r+   
down_blockrG   s       r)   r1   zEncoder.forward  s    ]3** 	6J&}5M	6 ''B0T0T+UVA1A MM-81<M  !MM-8Mr*   )rJ   r            r      r   r   r   r   r   r   r    r   r   rK   rK   rK   rf   Tr4   r5   r6   r7   r   r8   r   rk   r   r9   r:   r1   r;   r<   s   @r)   r|   r|      s    
 #%-7)I'97U%6!D]D] D]  	D]
 #uSz/*D] "#JD]  *D] uS#X34D]  #D] D]LU\\ ell r*   r|   c                        e Zd Z	 	 	 	 	 	 	 	 	 	 ddedededeeee   f   dee   dee   deeedf   df   d	eeee   f   d
eeee   f   dededef fdZde	j                  de	j                  fdZ xZS )Decoderr   r}   r@   rN   r~   r   rA   .r   r   upsample_block_typein_shortcutconv_act_fnc                    t         |           t        |      }t        |t              r|f|z  }t        |t              r|f|z  }t        |	t              r|	f|z  }	t        j                  ||d   ddd      | _        || _        |r|d   |z  | _	        g }t        t        t        t        ||                        D ]  \  }\  }}g }||dz
  k  r.|dkD  r)t        ||dz      ||
dk(  d      }|j                  |       t!        |      D ]1  }t#        ||   |||||   |	|   ||         }|j                  |       3 |j%                  dt        j&                  |         t        j(                  |      | _        |d   dkD  r|d   n|d   }t-        |d	dd
      | _        t1        |      | _        d | _        |d   dkD  rt        j                  ||ddd      | _        y t        |||
dk(  d      | _        y )Nr.   r   r   r   rn   T)rn   rW   r   gh㈵>)elementwise_affiner   F)r   r   r   r   r8   r    r#   r   r   in_shortcut_repeatsreversedlistr   r   rm   r   r   rS   insertr   r   	up_blocksr   norm_outr   conv_actrF   )r'   r   r}   r@   rN   r~   r   rA   r   r   r   r   r   r   r   r   r   r   up_block_listupsample_blockr   rR   channelsr(   s                          r)   r   zDecoder.__init__!  s!    	+,
j#&$3Ji%"z1Ifc"Y+Fyy2DR2H!QPQR&'9"'='PD$	,4T)CHZ\lDm:n5o,p 	?(A(ZM:>!j1n!,&q1u- 3} D!	" $$^4:& 
,!qM'9'l!!9$3A$6 $$U+
, Q} =>1	?4 y1,<Q,?!,C%a(I[\]I^$4dS&{3A"IIhQ1EDM'+3F-3WbgDMr*   r+   r   c                    | j                   rM|j                  | j                  d|j                  d   | j                  z        }| j	                  |      |z   }n| j	                  |      }t        | j                        D ]
  } ||      } | j                  |j                  dd            j                  dd      }| j                  |      }| j                  |      }|S )Nr   ru   r.   )r   rx   r   ry   r   r   r   r   r/   r   rF   )r'   r+   rG   up_blocks       r)   r1   zDecoder.forwardk  s    //((a]=P=PQR=SVZVnVn=n 0 A !LL7!;M LL7M 0 	4H$]3M	4 m&;&;Ar&BCKKBPQRm4m4r*   )
rJ   r   r   r   r   r-   r   rw   Trelur   r<   s   @r)   r   r      s   
 #%-7)I'97U,6)/#2 !HH H  	H
 #uSz/*H "#JH  *H uS#X34H eCj)H c5:o&H !H H HTU\\ ell r*   r   c            *           e Zd ZdZdZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d2dedededeee	e   f   deee	e   f   de	ed	f   d
e	ed	f   de	e   de	e   de	e	ed	f   d	f   de	e	ed	f   d	f   dededeee	e   f   deee	e   f   de
de
dededdf( fd       Z	 	 	 	 d3dee   dee   dee   dee   ddf
dZd4dZd4d Zd4d!Zd"ej&                  dej&                  fd#Zed5d"ej&                  d$e
deee	ej&                     f   fd%       Zd&ej&                  dej&                  fd'Zed5d&ej&                  d$e
deee	ej&                     f   fd(       Zd)ej&                  d*ej&                  d+edej&                  fd,Zd)ej&                  d*ej&                  d+edej&                  fd-Zd5d"ej&                  d$e
dej&                  fd.Zd5d&ej&                  d$e
deeej&                  f   fd/Zd5d0ej&                  d$e
dej&                  fd1Z xZ S )6AutoencoderDCa  
    An Autoencoder model introduced in [DCAE](https://huggingface.co/papers/2410.10733) and used in
    [SANA](https://huggingface.co/papers/2410.10629).

    This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
    for all models (such as downloading or saving).

    Args:
        in_channels (`int`, defaults to `3`):
            The number of input channels in samples.
        latent_channels (`int`, defaults to `32`):
            The number of channels in the latent space representation.
        encoder_block_types (`Union[str, Tuple[str]]`, defaults to `"ResBlock"`):
            The type(s) of block to use in the encoder.
        decoder_block_types (`Union[str, Tuple[str]]`, defaults to `"ResBlock"`):
            The type(s) of block to use in the decoder.
        encoder_block_out_channels (`Tuple[int, ...]`, defaults to `(128, 256, 512, 512, 1024, 1024)`):
            The number of output channels for each block in the encoder.
        decoder_block_out_channels (`Tuple[int, ...]`, defaults to `(128, 256, 512, 512, 1024, 1024)`):
            The number of output channels for each block in the decoder.
        encoder_layers_per_block (`Tuple[int]`, defaults to `(2, 2, 2, 3, 3, 3)`):
            The number of layers per block in the encoder.
        decoder_layers_per_block (`Tuple[int]`, defaults to `(3, 3, 3, 3, 3, 3)`):
            The number of layers per block in the decoder.
        encoder_qkv_multiscales (`Tuple[Tuple[int, ...], ...]`, defaults to `((), (), (), (5,), (5,), (5,))`):
            Multi-scale configurations for the encoder's QKV (query-key-value) transformations.
        decoder_qkv_multiscales (`Tuple[Tuple[int, ...], ...]`, defaults to `((), (), (), (5,), (5,), (5,))`):
            Multi-scale configurations for the decoder's QKV (query-key-value) transformations.
        upsample_block_type (`str`, defaults to `"pixel_shuffle"`):
            The type of block to use for upsampling in the decoder.
        downsample_block_type (`str`, defaults to `"pixel_unshuffle"`):
            The type of block to use for downsampling in the encoder.
        decoder_norm_types (`Union[str, Tuple[str]]`, defaults to `"rms_norm"`):
            The normalization type(s) to use in the decoder.
        decoder_act_fns (`Union[str, Tuple[str]]`, defaults to `"silu"`):
            The activation function(s) to use in the decoder.
        encoder_out_shortcut  (`bool`, defaults to `True`):
            Whether to use shortcut at the end of the encoder.
        decoder_in_shortcut (`bool`, defaults to `True`):
            Whether to use shortcut at the beginning of the decoder.
        decoder_conv_act_fn (`str`, defaults to `"relu"`):
            The activation function to use at the end of the decoder.
        scaling_factor (`float`, defaults to `1.0`):
            The multiplicative inverse of the root mean square of the latent features. This is used to scale the latent
            space to have unit variance when training the diffusion model. The latents are scaled with the formula `z =
            z * scaling_factor` before being passed to the diffusion model. When decoding, the latents are scaled back
            to the original scale with the formula: `z = 1 / scaling_factor * z`.
    Fr   r}   r@   encoder_block_typesdecoder_block_typesencoder_block_out_channels.decoder_block_out_channelsencoder_layers_per_blockdecoder_layers_per_blockencoder_qkv_multiscalesdecoder_qkv_multiscalesr   r   decoder_norm_typesdecoder_act_fnsencoder_out_shortcutdecoder_in_shortcutdecoder_conv_act_fnscaling_factorr   Nc                    t         |           t        |||||||
||	      | _        t	        ||||||	||||||      | _        dt        |      dz
  z  | _        d| _        d| _	        d| _
        d| _        d| _        d| _        d| _        | j                  | j                  z  | _        | j                  | j                  z  | _        y )N)	r   r}   r@   rN   r~   r   rA   r   r   )r   r}   r@   rN   r~   r   rA   r   r   r   r   r   r   r   Fr   i  )r   r   r|   encoderr   decoderr   spatial_compression_ratiotemporal_compression_ratiouse_slicing
use_tilingtile_sample_min_heighttile_sample_min_widthtile_sample_stride_heighttile_sample_stride_widthtile_latent_min_heighttile_latent_min_width)r'   r   r}   r@   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   s                       r)   r   zAutoencoderDC.__init__  s    . 	#+1*953"7-

 #+1*953(" 3++
 *+s3M/NQR/R)S&*+' !
   '*#%(" *-&(+%&*&A&ATEcEc&c#%)%?%?4CaCa%a"r*   r   r   r   r   c                 2   d| _         |xs | j                  | _        |xs | j                  | _        |xs | j                  | _        |xs | j                  | _        | j                  | j
                  z  | _        | j                  | j
                  z  | _        y)aV  
        Enable tiled AE decoding. When this option is enabled, the AE will split the input tensor into tiles to compute
        decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.

        Args:
            tile_sample_min_height (`int`, *optional*):
                The minimum height required for a sample to be separated into tiles across the height dimension.
            tile_sample_min_width (`int`, *optional*):
                The minimum width required for a sample to be separated into tiles across the width dimension.
            tile_sample_stride_height (`int`, *optional*):
                The minimum amount of overlap between two consecutive vertical tiles. This is to ensure that there are
                no tiling artifacts produced across the height dimension.
            tile_sample_stride_width (`int`, *optional*):
                The stride between two consecutive horizontal tiles. This is to ensure that there are no tiling
                artifacts produced across the width dimension.
        TN)r   r   r   r   r   r   r   r   )r'   r   r   r   r   s        r)   enable_tilingzAutoencoderDC.enable_tiling  s    0 &<&[@[@[#%:%Xd>X>X")B)ddFdFd&(@(aDDaDa%&*&A&ATEcEc&c#%)%?%?4CaCa%a"r*   c                     d| _         y)z
        Disable tiled AE decoding. If `enable_tiling` was previously enabled, this method will go back to computing
        decoding in one step.
        FN)r   r'   s    r)   disable_tilingzAutoencoderDC.disable_tiling  s    
  r*   c                     d| _         y)z
        Enable sliced AE decoding. When this option is enabled, the AE will split the input tensor in slices to compute
        decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        TNr   r   s    r)   enable_slicingzAutoencoderDC.enable_slicing"  s    
  r*   c                     d| _         y)z
        Disable sliced AE decoding. If `enable_slicing` was previously enabled, this method will go back to computing
        decoding in one step.
        FNr   r   s    r)   disable_slicingzAutoencoderDC.disable_slicing)  s    
 !r*   rG   c                     |j                   \  }}}}| j                  r4|| j                  kD  s|| j                  kD  r| j	                  |d      d   S | j                  |      }|S NFreturn_dictr   )ry   r   r   r   tiled_encoder   )r'   rG   
batch_sizenum_channelsheightwidthencodeds          r)   _encodezAutoencoderDC._encode0  b    23''/
L&%??(B(B BftOjOjFj$$QE$:1==,,q/r*   r   c                    | j                   rU|j                  d   dkD  rC|j                  d      D cg c]  }| j                  |       }}t	        j
                  |      }n| j                  |      }|s|fS t        |      S c c}w )a  
        Encode a batch of images into latents.

        Args:
            x (`torch.Tensor`): Input batch of images.
            return_dict (`bool`, defaults to `True`):
                Whether to return a [`~models.vae.EncoderOutput`] instead of a plain tuple.

        Returns:
                The latent representations of the encoded videos. If `return_dict` is True, a
                [`~models.vae.EncoderOutput`] is returned, otherwise a plain `tuple` is returned.
        r   r   latent)r   ry   splitr   r9   catr   )r'   rG   r   x_sliceencoded_slicesr   s         r)   encodezAutoencoderDC.encode:  sv     
QCD771:Ndll73NNNii/Gll1oG:G,, Os   Bzc                     |j                   \  }}}}| j                  r4|| j                  kD  s|| j                  kD  r| j	                  |d      d   S | j                  |      }|S r   )ry   r   r   r   tiled_decoder   )r'   r   r   r   r   r   decodeds          r)   _decodezAutoencoderDC._decodeR  r   r*   c                 *   | j                   ra|j                  d      dkD  rM|j                  d      D cg c]  }| j                  |      j                   }}t        j                  |      }n| j                  |      }|s|fS t        |      S c c}w )a  
        Decode a batch of images.

        Args:
            z (`torch.Tensor`): Input batch of latent vectors.
            return_dict (`bool`, defaults to `True`):
                Whether to return a [`~models.vae.DecoderOutput`] instead of a plain tuple.

        Returns:
            [`~models.vae.DecoderOutput`] or `tuple`:
                If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is
                returned.
        r   r   sample)r   sizer   r   r   r9   r   r   )r'   r   r   z_slicedecoded_slicesr   s         r)   decodezAutoencoderDC.decode\  s}     q	AJK''RS*Uwdll73::UNUii/Gll1oG:G,, Vs   "Babblend_extentc                     t        |j                  d   |j                  d   |      }t        |      D ]A  }|d d d d | |z   d d f   d||z  z
  z  |d d d d |d d f   ||z  z  z   |d d d d |d d f<   C |S )Nr   r   minry   r   )r'   r   r   r   rj   s        r)   blend_vzAutoencoderDC.blend_vu  s    1771:qwwqz<@|$ 	xAa\MA$5q89Q\AQ=QRUVWXZ[]^`aWaUbfgjvfvUwwAaAqjM	xr*   c                     t        |j                  d   |j                  d   |      }t        |      D ]A  }|d d d d d d | |z   f   d||z  z
  z  |d d d d d d |f   ||z  z  z   |d d d d d d |f<   C |S )Nr   r   r  )r'   r   r   r   rG   s        r)   blend_hzAutoencoderDC.blend_h{  s    1771:qwwqz<@|$ 	xAaA}q'889Q\AQ=QRUVWXZ[]^`aWaUbfgjvfvUwwAaAqjM	xr*   c           
         |j                   \  }}}}|| j                  z  }|| j                  z  }| j                  | j                  z  }	| j                  | j                  z  }
| j                  | j                  z  }| j
                  | j                  z  }|	|z
  }|
|z
  }g }t        d|j                   d   | j                        D ]6  }g }t        d|j                   d   | j
                        D ]  }|d d d d ||| j                  z   ||| j                  z   f   }|j                   d   | j                  z  dk7  s|j                   d   | j                  z  dk7  rl| j                  |j                   d   z
  | j                  z  }| j                  |j                   d   z
  | j                  z  }t        j                  |d|d|f      }| j                  |      }|j                  |        |j                  |       9 g }t        |      D ]  \  }}g }t        |      D ]d  \  }}|dkD  r| j                  ||dz
     |   ||      }|dkD  r| j                  ||dz
     ||      }|j                  |d d d d d |d |f          f |j                  t        j                  |d              t        j                  |d      d d d d d |d |f   }|s|fS t!        |      S )Nr   r   r   r   rc   r   )ry   r   r   r   r   r   r   re   padr   r   r   r  r  r9   r   r   )r'   rG   r   r   r   r   r   latent_heightlatent_widthr   r   tile_latent_stride_heighttile_latent_stride_widthblend_heightblend_widthrowsr   rowjtilepad_hpad_wresult_rows
result_rowr   s                            r)   r   zAutoencoderDC.tiled_encode  s   23''/
L&%$"@"@@ > >>!%!<!<@^@^!^ $ : :d>\>\ \$($B$BdFdFd$d!#'#@#@DDbDb#b -0II+.FF q!''!*d&D&DE 	AC1aggaj$*G*GH 
!Aq1t'B'B#BBADLfLfHfDffgJJqMD$B$BBaGzz!}t'E'EEJ!;;djjmKtOmOmmE!;;djjmKtOmOmmE555!U';<D||D)

4 
! KK	 o 
	=FAsJ$S> e4 q5<<QUAlKDq5<<AE
D+FD!!$q!-G.G-GIbJbIb'b"cde uyy;<
	= ))KQ/1n}nm|m0ST:G,,r*   c           
      
   |j                   \  }}}}| j                  | j                  z  }| j                  | j                  z  }| j                  | j                  z  }	| j
                  | j                  z  }
| j                  | j                  z
  }| j                  | j
                  z
  }g }t        d||	      D ]`  }g }t        d||
      D ];  }|d d d d |||z   |||z   f   }| j                  |      }|j                  |       = |j                  |       b g }t        |      D ]  \  }}g }t        |      D ]x  \  }}|dkD  r| j                  ||dz
     |   ||      }|dkD  r| j                  ||dz
     ||      }|j                  |d d d d d | j                  d | j
                  f          z |j                  t        j                  |d              t        j                  |d      }|s|fS t        |      S )Nr   r   r   rc   r   r   )ry   r   r   r   r   r   r   r   r   r   r  r  r9   r   r   )r'   r   r   r   r   r   r   r   r   r
  r  r  r  r  r   r  r  r  r   r  r  s                        r)   r   zAutoencoderDC.tiled_decode  s'   23''/
L&%!%!<!<@^@^!^ $ : :d>\>\ \$($B$BdFdFd$d!#'#@#@DDbDb#b 22T5S5SS0043P3PP q&";< 	AC1e%=> $Aq1'=#==q1G\C\?\\],,t,

7#$ KK	 o 
	=FAsJ$S> q4 q5<<QUAlKDq5<<AE
D+FD!!$q!-Mt/M/M-MOnQUQnQnOn'n"opq uyy;<
	= ))KQ/:G,,r*   r   c                 |    | j                  |d      d   }| j                  |d      d   }|s|fS t        |      S )NFr   r   r   )r   r   r   )r'   r   r   r   r   s        r)   r1   zAutoencoderDC.forward  sF    ++f%+8;++g5+9!<:G,,r*   )r   rJ   rJ   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   r   r   rw   rf   r-   r   TTr   rI   )NNNN)r   N)T)!r4   r5   r6   __doc__ _supports_gradient_checkpointingr   r7   r   r8   r   rk   rM   r   r   r   r   r   r   r9   r:   r   r
   r   r   r   r   r   r  r  r   r   r1   r;   r<   s   @r)   r   r   }  s   /b (-$ !"$6@6@6V6V/A/A?]?]#2%65?28%)$(#) #)GbGb Gb  	Gb
 #3c
?3Gb #3c
?3Gb %*#s(OGb %*#s(OGb #(*Gb #(*Gb "'uS#X';!<Gb "'uS#X';!<Gb !Gb  #Gb "#uSz/2Gb  sE#J/!Gb" ##Gb$ "%Gb& !'Gb( )Gb* 
+Gb GbV 15/35948b (b  (}b $,E?	b
 #+5/b 
b@  ! %,,  - -4 -5X]^c^j^jXkIkCl - -. %,,  - -4 -5X]^c^j^jXkIkCl - -0 %,, c ell  %,, c ell .-ell .- .- .-`'-ell '- '-}^c^j^jOjIk '-R-ell - - -r*   r   )r   )*typingr   r   r   r9   torch.nnr    torch.nn.functional
functionalre   configuration_utilsr   r   loadersr	   utils.accelerate_utilsr
   activationsr   attention_processorr   modeling_utilsr   normalizationr   r   transformers.sana_transformerr   vaer   r   Moduler   r>   r8   r7   rS   rU   rm   r|   r   r   r   r*   r)   <module>r(     s    * )     B - 8 ( ? ' 6 5 -(ryy (@		 P #%  	
   3Z.$BII $N'")) 'TSbii SlZbii Zzb-J-C b-r*   