
    bi                     @   d dl mZ d dlmZmZ d dlZd dlZd dlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZ e G d de             Ze G d de             Z G d de	j2                        Z G d de	j2                        Z G d de	j2                        Z G d de	j2                        Z G d de	j2                        Z G d de	j2                        Z G d de       Z! G d de       Z" G d d e	j2                        Z# G d! d"e	j2                        Z$y)#    )	dataclass)OptionalTupleN   )
BaseOutput)randn_tensor   )get_activation)SpatialNorm)AutoencoderTinyBlockUNetMidBlock2Dget_down_blockget_up_blockc                   0    e Zd ZU dZej
                  ed<   y)EncoderOutputz
    Output of encoding method.

    Args:
        latent (`torch.Tensor` of shape `(batch_size, num_channels, latent_height, latent_width)`):
            The encoded latent.
    latentN)__name__
__module____qualname____doc__torchTensor__annotations__     \/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/autoencoders/vae.pyr   r   !   s     LLr   r   c                   X    e Zd ZU dZej
                  ed<   dZeej                     ed<   y)DecoderOutputz
    Output of decoding method.

    Args:
        sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)`):
            The decoded output sample from the last layer of the model.
    sampleNcommit_loss)
r   r   r   r   r   r   r   r    r   FloatTensorr   r   r   r   r   .   s(     LL/3K%++,3r   r   c                        e Zd ZdZ	 	 	 	 	 	 	 	 	 ddededeedf   deedf   deded	ed
ef fdZde	j                  de	j                  fdZ xZS )Encodera  
    The `Encoder` layer of a variational autoencoder that encodes its input into a latent representation.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
        down_block_types (`Tuple[str, ...]`, *optional*, defaults to `("DownEncoderBlock2D",)`):
            The types of down blocks to use. See `~diffusers.models.unet_2d_blocks.get_down_block` for available
            options.
        block_out_channels (`Tuple[int, ...]`, *optional*, defaults to `(64,)`):
            The number of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups for normalization.
        act_fn (`str`, *optional*, defaults to `"silu"`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
        double_z (`bool`, *optional*, defaults to `True`):
            Whether to double the number of output channels for the last block.
    in_channelsout_channelsdown_block_types.block_out_channelslayers_per_blocknorm_num_groupsact_fndouble_zc
                    t         |           || _        t        j                  ||d   ddd      | _        t        j                  g       | _        |d   }
t        |      D ]Y  \  }}|
}||   }
|t        |      dz
  k(  }t        || j                  ||
| dd|||
d       }| j                  j                  |       [ t        |d   d|dd|d   |d |			      | _        t        j                  |d   |d
      | _        t        j                          | _        |rd|z  n|}t        j                  |d   |dd      | _        d| _        y )Nr   r      kernel_sizestridepaddingư>)

num_layersr$   r%   add_downsample
resnet_epsdownsample_paddingresnet_act_fnresnet_groupsattention_head_dimtemb_channelsdefault	r$   r5   r7   output_scale_factorresnet_time_scale_shiftr9   r8   r:   add_attentionnum_channels
num_groupsepsr	   r1   F)super__init__r(   nnConv2dconv_in
ModuleListdown_blocks	enumeratelenr   appendr   	mid_block	GroupNormconv_norm_outSiLUconv_actconv_outgradient_checkpointing)selfr$   r%   r&   r'   r(   r)   r*   r+   mid_block_add_attentionoutput_channelidown_block_typeinput_channelis_final_block
down_blockconv_out_channels	__class__s                    r   rG   zEncoder.__init__T   sw    	 0yyq!
 ==, ,A."+,<"= 	0A*M/2N#&8"9A"==N'00)+#11#$$-#1"J ##J/%	0* (*2.  !$-1"5)1

  \\7I"7MZiost	08A,l		"4R"8:KQXYZ&+#r   r   returnc                    | j                  |      }t        j                         rL| j                  r@| j                  D ]  }| j                  ||      } | j                  | j                  |      }n*| j                  D ]
  } ||      } | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S )z*The forward method of the `Encoder` class.)
rJ   r   is_grad_enabledrV   rL   _gradient_checkpointing_funcrP   rR   rT   rU   )rW   r   r^   s      r   forwardzEncoder.forward   s     f%  "t'B'B".. O
:::vNO 66t~~vNF #.. ,
#F+, ^^F+F ##F+v&v&r   )	r   r   )DownEncoderBlock2D@   r	       siluTT)r   r   r   r   intr   strboolrG   r   r   re   __classcell__r`   s   @r   r#   r#   <   s    2 ,C.3 !! $C,C, C,  S/	C,
 "#s(OC, C, C, C, C,Jell u|| r   r#   c                        e Zd ZdZ	 	 	 	 	 	 	 	 	 ddededeedf   deedf   deded	ed
ef fdZ	 ddej                  de
ej                     dej                  fdZ xZS )Decodera  
    The `Decoder` layer of a variational autoencoder that decodes its latent representation into an output sample.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
        up_block_types (`Tuple[str, ...]`, *optional*, defaults to `("UpDecoderBlock2D",)`):
            The types of up blocks to use. See `~diffusers.models.unet_2d_blocks.get_up_block` for available options.
        block_out_channels (`Tuple[int, ...]`, *optional*, defaults to `(64,)`):
            The number of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups for normalization.
        act_fn (`str`, *optional*, defaults to `"silu"`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
        norm_type (`str`, *optional*, defaults to `"group"`):
            The normalization type to use. Can be either `"group"` or `"spatial"`.
    r$   r%   up_block_types.r'   r(   r)   r*   	norm_typec
                    t         |           || _        t        j                  ||d   ddd      | _        t        j                  g       | _        |dk(  r|nd }
t        |d   d|d|dk(  rdn||d   ||
|			      | _	        t        t        |            }|d
   }t        |      D ]_  \  }}|}||   }|t        |      dz
  k(  }t        || j                  dz   |||| d||||
|      }| j                  j                  |       |}a |dk(  rt!        |d
   |
      | _        n t        j$                  |d
   |d      | _        t        j&                         | _        t        j                  |d
   |dd      | _        d| _        y )Nr;   r   r-   r.   spatialr2   groupr<   r=   r   r3   r$   r%   prev_output_channeladd_upsampler5   r7   r8   r9   r:   r?   rA   rE   F)rF   rG   r(   rH   rI   rJ   rK   	up_blocksr   rP   listreversedrM   rN   r   rO   r   rR   rQ   rS   rT   rU   rV   )rW   r$   r%   rr   r'   r(   r)   r*   rs   rX   r:   reversed_block_out_channelsrY   rZ   up_block_typerx   r]   up_blockr`   s                     r   rG   zDecoder.__init__   s    	 0yyr"
 r*'0I'=4 (*2.  !1:g1EI91"5)'1

 '+84F+G&H#4Q7 ). 9 	1A}"08;N#&8"9A"==N#0014/+$7!//$-#1+(1H NN!!(+"0+	10 	!!,-?-BM!RD!#;Ma;P]lrv!wD			"4Q"7qRST&+#r   r   latent_embedsra   c                    | j                  |      }t        t        | j                  j	                                     j
                  }t        j                         r_| j                  rS| j                  | j                  ||      }|j                  |      }| j                  D ]  }| j                  |||      } n=| j                  ||      }|j                  |      }| j                  D ]  } |||      } || j                  |      }n| j                  ||      }| j                  |      }| j                  |      }|S )z*The forward method of the `Decoder` class.)rJ   nextiterrz   
parametersdtyper   rc   rV   rd   rP   torR   rT   rU   )rW   r   r   upscale_dtyper   s        r   re   zDecoder.forward  s%    f%T$..";";"=>?EE  "t'B'B66t~~v}]FYY}-F !NN \::8V][\ ^^FM:FYY}-F !NN 9!&-89  ''/F''>Fv&v&r   )	r   r   UpDecoderBlock2Drg   r	   ri   rj   rv   TNr   r   r   r   rk   r   rl   rG   r   r   r   re   rn   ro   s   @r   rq   rq      s    0 *?.3 !!  $J,J, J, c3h	J,
 "#s(OJ, J, J, J, J,^ 15##  -# 
	#r   rq   c                   h     e Zd ZdZdededdf fdZdej                  dej                  fdZ xZ	S )	UpSamplea&  
    The `UpSample` layer of a variational autoencoder that upsamples its input.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
    r$   r%   ra   Nc                 |    t         |           || _        || _        t	        j
                  ||ddd      | _        y )N   r	   r-   r.   )rF   rG   r$   r%   rH   ConvTranspose2ddeconv)rW   r$   r%   r`   s      r   rG   zUpSample.__init__I  s=    
 	&(((lPQZ[efgr   xc                 R    t        j                  |      }| j                  |      }|S )z+The forward method of the `UpSample` class.)r   relur   rW   r   s     r   re   zUpSample.forwardS  s!    JJqMKKNr   
r   r   r   r   rk   rG   r   r   re   rn   ro   s   @r   r   r   >  sH    hh h 
	h %,, r   r   c                   z     e Zd ZdZ	 	 	 ddededededdf
 fdZdd	ej                  dej                  fd
Z xZ	S )MaskConditionEncoderz)
    used in AsymmetricAutoencoderKL
    in_chout_chres_chr0   ra   Nc           
      4   t         |           g }|dkD  r6|dz  }|dz  }||kD  r|}|dk(  r|}|j                  ||f       |dz  }|dkD  r6g }|D ]  \  }}	|j                  |	        |j                  |d   d          g }
|}t        t	        |            D ]f  }||   }|dk(  s|dk(  r*|
j                  t        j                  ||ddd             n)|
j                  t        j                  ||ddd             |}h t        j                  |
 | _        y )Nr-   r	   r;   r   r   r.   r   )	rF   rG   rO   rangerN   rH   rI   
Sequentiallayers)rW   r   r   r   r0   channelsin_ch_r%   _in_ch_out_chr   lout_ch_r`   s                r   rG   zMaskConditionEncoder.__init___  s3    	qjq[FaZF{OOVV,-aKF qj ' 	)OFG(	)HRLO,s<() 	A"1oGAvabiiQqZ[\]biiQqZ[\]F	 mmV,r   r   c                     i }t        t        | j                              D ]O  }| j                  |   } ||      }||t        t	        |j
                              <   t        j                  |      }Q |S )z7The forward method of the `MaskConditionEncoder` class.)r   rN   r   rl   tupleshaper   r   )rW   r   maskoutr   layers         r   re   zMaskConditionEncoder.forward  se    s4;;'( 	AKKNEaA'(CE!''N#$

1A		
 
r   )   i      r   r   ro   s   @r   r   r   Z  sd     #-#- #- 	#-
 #- 
#-J U\\ r   r   c                       e Zd ZdZ	 	 	 	 	 	 	 	 ddededeedf   deedf   deded	ed
ef fdZ	 	 	 ddej                  de
ej                     de
ej                     de
ej                     dej                  f
dZ xZS )MaskConditionDecodera  The `MaskConditionDecoder` should be used in combination with [`AsymmetricAutoencoderKL`] to enhance the model's
    decoder with a conditioner on the mask and masked image.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
        up_block_types (`Tuple[str, ...]`, *optional*, defaults to `("UpDecoderBlock2D",)`):
            The types of up blocks to use. See `~diffusers.models.unet_2d_blocks.get_up_block` for available options.
        block_out_channels (`Tuple[int, ...]`, *optional*, defaults to `(64,)`):
            The number of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups for normalization.
        act_fn (`str`, *optional*, defaults to `"silu"`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
        norm_type (`str`, *optional*, defaults to `"group"`):
            The normalization type to use. Can be either `"group"` or `"spatial"`.
    r$   r%   rr   .r'   r(   r)   r*   rs   c	                 8   t         |           || _        t        j                  ||d   ddd      | _        t        j                  g       | _        |dk(  r|nd }	t        |d   d|d|dk(  rdn||d   ||		      | _	        t        t        |            }
|
d
   }t        |      D ]_  \  }}|}|
|   }|t        |      dz
  k(  }t        || j                  dz   ||d | d||||	|      }| j                  j                  |       |}a t!        ||d
   |d         | _        |dk(  rt%        |d
   |	      | _        n t        j(                  |d
   |d      | _        t        j*                         | _        t        j                  |d
   |dd      | _        d| _        y )Nr;   r   r-   r.   ru   r2   rv   r<   )r$   r5   r7   r>   r?   r9   r8   r:   r   rw   )r   r   r   rA   rE   F)rF   rG   r(   rH   rI   rJ   rK   rz   r   rP   r{   r|   rM   rN   r   rO   r   condition_encoderr   rR   rQ   rS   rT   rU   rV   )rW   r$   r%   rr   r'   r(   r)   r*   rs   r:   r}   rY   rZ   r~   rx   r]   r   r`   s                    r   rG   zMaskConditionDecoder.__init__  s    	 0yyr"
 r*'0I'=4 (*2.  !1:g1EI91"5)'	
 '+84F+G&H#4Q7 ). 9 	1A}"08;N#&8"9A"==N#0014/+$(!//$-#1+(1H NN!!(+"0+	10 "6%a(%b)"
 	!!,-?-BM!RD!#;Ma;P]lrv!wD			"4Q"7qRST&+#r   zimager   r   ra   c                 j   |}| j                  |      }t        t        | j                  j	                                     j
                  }t        j                         r| j                  r| j                  | j                  ||      }|j                  |      }|'|%d|z
  |z  }| j                  | j                  ||      }| j                  D ]w  }	|`|^t        t        |j                                 }
t         j"                  j%                  ||j                  dd d      }||z  |
d|z
  z  z   }| j                  |	||      }y ||||z  t        t        |j                                 d|z
  z  z   }n| j                  ||      }|j                  |      }||d|z
  |z  }| j                  ||      }| j                  D ]m  }	|`|^t        t        |j                                 }
t         j"                  j%                  ||j                  dd d      }||z  |
d|z
  z  z   } |	||      }o |/|-||z  t        t        |j                                 d|z
  z  z   }|| j'                  |      }n| j'                  ||      }| j)                  |      }| j+                  |      }|S )z7The forward method of the `MaskConditionDecoder` class.Nr-   nearest)sizemode)rJ   r   r   rz   r   r   r   rc   rV   rd   rP   r   r   rl   r   r   rH   
functionalinterpolaterR   rT   rU   )rW   r   r   r   r   r   r   masked_imageim_xr   sample_mask_s               r   re   zMaskConditionDecoder.forward  s    f%T$..";";"=>?EE  "t'B'B66t~~v}]FYY}-F  T%5 !DE188**  !NN \$)9"3uV\\':#;<GMM55dbcARYb5cE#e^gU.CCF::8V][\  T%5$c%2E.F)G1t8)TT ^^FM:FYY}-F  T%5 !DE1--lDA !NN 9$)9"3uV\\':#;<GMM55dbcARYb5cE#e^gU.CCF!&-89  T%5$c%2E.F)G1t8)TT  ''/F''>Fv&v&r   )r   r   r   rg   r	   ri   rj   rv   )NNNr   ro   s   @r   r   r     s    0 *?.3 !! O,O, O, c3h	O,
 "#s(OO, O, O, O, O,h )-'+04?<<? %? u||$	?
  -? 
?r   r   c                   T    e Zd ZdZ	 	 	 	 ddedededededef fdZd	e	j                  d
e	j                  fdZd	e	j                  d
e	j                  fdZde	j                  d
ee	j                  e	j                  ef   fdZde	j                  deedf   d
e	j                  fdZ xZS )VectorQuantizerz
    Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly avoids costly matrix
    multiplications and allows for post-hoc remapping of indices.
    n_evq_embed_dimbetaunknown_indexsane_index_shapelegacyc           	      >   t         |           || _        || _        || _        || _        t        j                  | j                  | j                        | _        | j                  j                  j                  j                  d| j                  z  d| j                  z         || _        | j                  | j                  dt        j                  t!        j"                  | j                                     |  | j$                  j&                  d   | _        || _        | j*                  dk(  r%| j(                  | _        | j(                  dz   | _        t-        d| j                   d| j(                   d	| j*                   d
       || _        y || _        || _        y )Ng            ?usedr   extrar-   z
Remapping z indices to z indices. Using z for unknown indices.)rF   rG   r   r   r   r   rH   	Embedding	embeddingweightdatauniform_remapregister_bufferr   tensornploadr   r   re_embedr   printr   )	rW   r   r   r   r   r   r   r   r`   s	           r   rG   zVectorQuantizer.__init__B  sL    	(	dhh0A0AB""++D488OS488^L
::!  bggdjj6I)JK IIOOA.DM!.D!!W,%)]]" $ 1TXXJl4==/ B++,,AC !1  DM 0r   indsra   c                    |j                   }t        |      dkD  sJ |j                  |d   d      }| j                  j	                  |      }|d d d d d f   |d   k(  j                         }|j                  d      }|j                  d      dk  }| j                  dk(  rMt        j                  d| j                  ||   j                         j	                  |j                        ||<   n| j                  ||<   |j                  |      S )	Nr-   r   r;   )NN.r	   random)r   )device)r   rN   reshaper   r   longargmaxsumr   r   randintr   r   )rW   r   ishaper   matchnewunknowns          r   remap_to_usedzVectorQuantizer.remap_to_usedg  s    6{Q||F1Ir*yy||D!aDj!T/%::@@Bll2))A,") ==DMMG@R@RSVV^a^h^hViCL--CL{{6""r   c                    |j                   }t        |      dkD  sJ |j                  |d   d      }| j                  j	                  |      }| j
                  | j                  j                   d   kD  rd||| j                  j                   d   k\  <   t        j                  |d d d f   |j                   d   dgz  d d f   d|      }|j                  |      S )Nr-   r   r;   )r   rN   r   r   r   r   r   gather)rW   r   r   r   backs        r   unmap_to_allzVectorQuantizer.unmap_to_allu  s    6{Q||F1Ir*yy||D!==499??1--/0D++,||DqM$**Q-1#*=q*@A1dK||F##r   r   c                    |j                  dddd      j                         }|j                  d| j                        }t	        j
                  t	        j                  || j                  j                        d      }| j                  |      j                  |j                        }d }d }| j                  sa| j                  t	        j                  |j                         |z
  dz        z  t	        j                  ||j                         z
  dz        z   }n`t	        j                  |j                         |z
  dz        | j                  t	        j                  ||j                         z
  dz        z  z   }|||z
  j                         z   }|j                  dddd      j                         }| j                  B|j                  |j                  d   d      }| j!                  |      }|j                  dd      }| j"                  r:|j                  |j                  d   |j                  d   |j                  d         }|||||ffS )Nr   r	   r   r-   r;   dim)permute
contiguousviewr   r   argmincdistr   r   r   r   r   meandetachr   r   r   r   )rW   r   z_flattenedmin_encoding_indicesz_q
perplexitymin_encodingslosss           r   re   zVectorQuantizer.forward  s   IIaAq!,,.ffR!2!23  %||EKKT^^EZEZ,[abcnn1277@
 {{99uzz3::<!+;*ABBUZZQTWXW_W_WaQafgPgEhhD::szz|a/A56UZZQTWXW_W_WaQafgPgEh9hhD q 0 0 22 kk!Q1%002::!#7#?#?
B#O #'#5#56J#K #7#?#?A#F   #7#?#?		!ciiXYl\_\e\efg\h#i D:}6JKKKr   indicesr   .c                    | j                   7|j                  |d   d      }| j                  |      }|j                  d      }| j                  |      }|3|j	                  |      }|j                  dddd      j                         }|S )Nr   r;   r   r-   r	   )r   r   r   r   r   r   r   )rW   r   r   r   s       r   get_codebook_entryz"VectorQuantizer.get_codebook_entry  s    ::!ooeAh3G''0Goob)G !NN73((5/C++aAq)446C
r   )Nr   FT)r   r   r   r   rk   floatrl   rm   rG   r   
LongTensorr   r   r   r   re   r   rn   ro   s   @r   r   r   9  s     %!&#1#1 #1 	#1 #1 #1 #1J#%"2"2 #u7G7G #$!1!1 $e6F6F $ L  L%ellE0Q*R  LD%*:*: 5c? W\WcWc r   r   c                      e Zd Zddej                  defdZddeej                     dej                  fdZ	ddd dej                  fd	Z
g d
fdej                  deedf   dej                  fdZdej                  fdZy)DiagonalGaussianDistributionr   deterministicc                    || _         t        j                  |dd      \  | _        | _        t        j
                  | j                  dd      | _        || _        t        j                  d| j                  z        | _        t        j                  | j                        | _	        | j                  rWt        j                  | j                  | j                   j                  | j                   j                        x| _	        | _        y y )Nr	   r-   r   g      >g      4@      ?)r   r   )r   r   chunkr   logvarclampr   expstdvar
zeros_liker   r   )rW   r   r   s      r   rG   z%DiagonalGaussianDistribution.__init__  s    $!&Z!B	4;kk$++ud;*99S4;;./99T[[)"'"2"2		$//"8"8@U@U# DHtx r   N	generatorra   c                     t        | j                  j                  || j                  j                  | j                  j
                        }| j                  | j                  |z  z   }|S )N)r
  r   r   )r   r   r   r   r   r   r  )rW   r
  r   r   s       r   r   z#DiagonalGaussianDistribution.sample  sR    IIOO??))//''	
 II6))r   otherc                    | j                   rt        j                  dg      S |Wdt        j                  t        j                  | j
                  d      | j                  z   dz
  | j                  z
  g d      z  S dt        j                  t        j                  | j
                  |j
                  z
  d      |j                  z  | j                  |j                  z  z   dz
  | j                  z
  |j                  z   g d      z  S )N        r  r	   r   r-   r	   r   r   )r   r   r   r   powr   r  r  )rW   r  s     r   klzDiagonalGaussianDistribution.kl  s    <<&&}UYYIIdii+dhh6<t{{J!  
 UYYIIdii%**4a8599Dhh*+ kk" ll	#
 "  r   r  r   dims.c                 B   | j                   rt        j                  dg      S t        j                  dt        j
                  z        }dt        j                  || j                  z   t        j                  || j                  z
  d      | j                  z  z   |      z  S )Nr  g       @r  r	   r   )r   r   r   r   logpir   r  r  r   r  )rW   r   r  logtwopis       r   nllz DiagonalGaussianDistribution.nll  s{    <<&&66#+&UYYt{{"UYYv		/A1%E%PP
 
 	
r   c                     | j                   S r   )r   rW   s    r   r   z!DiagonalGaussianDistribution.mode  s    yyr   )Fr   )r   r   r   r   r   rm   rG   r   	Generatorr   r  r   rk   r  r   r   r   r   r   r     s    
5<< 
 
	 9 	U\\ 	6 %,, & AJ 
%,, 
eCHo 
ell 
ell r   r   c                       e Zd Zdej                  fdZddeej                     dej                  fdZdej                  fdZ	y)	IdentityDistributionr   c                     || _         y r   r   )rW   r   s     r   rG   zIdentityDistribution.__init__  s	    $r   Nr
  ra   c                     | j                   S r   r  )rW   r
  s     r   r   zIdentityDistribution.sample      r   c                     | j                   S r   r  r  s    r   r   zIdentityDistribution.mode  r   r   r   )
r   r   r   r   r   rG   r   r  r   r   r   r   r   r  r    sB    %5<< % 9 U\\ ell r   r  c            
            e Zd ZdZdededeedf   deedf   def
 fdZd	ej                  d
ej                  fdZ
 xZS )EncoderTinya  
    The `EncoderTiny` layer is a simpler version of the `Encoder` layer.

    Args:
        in_channels (`int`):
            The number of input channels.
        out_channels (`int`):
            The number of output channels.
        num_blocks (`Tuple[int, ...]`):
            Each value of the tuple represents a Conv2d layer followed by `value` number of `AutoencoderTinyBlock`'s to
            use.
        block_out_channels (`Tuple[int, ...]`):
            The number of output channels for each block.
        act_fn (`str`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
    r$   r%   
num_blocks.r'   r*   c                    t         |           g }t        |      D ]  \  }}||   }	|dk(  r)|j                  t	        j
                  ||	dd             n*|j                  t	        j
                  |	|	dddd             t        |      D ]  }
|j                  t        |	|	|                |j                  t	        j
                  |d   |dd             t	        j                  | | _	        d| _
        y )	Nr   r   r-   r/   r1   r	   F)r/   r1   r0   biasr;   )rF   rG   rM   rO   rH   rI   r   r   r   r   rV   )rW   r$   r%   r$  r'   r*   r   rZ   	num_blockrB   _r`   s              r   rG   zEncoderTiny.__init__  s     	%j1 	XLAy-a0LAvbii\qZ[\]II$$$% ! "	 9% X2<vVWX#	X( 	bii 22 6RS]^_`mmV,&+#r   r   ra   c                     t        j                         r*| j                  r| j                  | j                  |      }|S | j	                  |j                  d      j                  d            }|S )z.The forward method of the `EncoderTiny` class.r-   r	   )r   rc   rV   rd   r   adddivr   s     r   re   zEncoderTiny.forward,  sX      "t'B'B11$++qAA  AEE!HLLO,Ar   r   r   r   r   rk   r   rl   rG   r   r   re   rn   ro   s   @r   r#  r#    si    "",", ", #s(O	",
 "#s(O", ",H	 	%,, 	r   r#  c                        e Zd ZdZdededeedf   deedf   deded	ef fd
Zdej                  dej                  fdZ
 xZS )DecoderTinya  
    The `DecoderTiny` layer is a simpler version of the `Decoder` layer.

    Args:
        in_channels (`int`):
            The number of input channels.
        out_channels (`int`):
            The number of output channels.
        num_blocks (`Tuple[int, ...]`):
            Each value of the tuple represents a Conv2d layer followed by `value` number of `AutoencoderTinyBlock`'s to
            use.
        block_out_channels (`Tuple[int, ...]`):
            The number of output channels for each block.
        upsampling_scaling_factor (`int`):
            The scaling factor to use for upsampling.
        act_fn (`str`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
    r$   r%   r$  .r'   upsampling_scaling_factorr*   upsample_fnc           
         t         |           t        j                  ||d   dd      t	        |      g}t        |      D ]  \  }	}
|	t        |      dz
  k(  }||	   }t        |
      D ]  }|j                  t        |||               |s&|j                  t        j                  ||             |s|n|}|j                  t        j                  ||dd|              t        j                  | | _        d| _        y )Nr   r   r-   r&  )scale_factorr   )r/   r1   r'  F)rF   rG   rH   rI   r
   rM   rN   r   rO   r   Upsampler   r   rV   )rW   r$   r%   r$  r'   r0  r*   r1  r   rZ   r(  r]   rB   r)  conv_out_channelr`   s                  r   rG   zDecoderTiny.__init__L  s    	 IIk#5a#8aQRS6"

 &j1 	LAy3z?Q#67N-a0L9% X2<vVWX "bkk7PWbcd3A||MM		 $ !'	* mmV,&+#r   r   ra   c                    t        j                  |dz        dz  }t        j                         r)| j                  r| j	                  | j
                  |      }n| j                  |      }|j                  d      j                  d      S )z.The forward method of the `DecoderTiny` class.r   r	   r-   )r   tanhrc   rV   rd   r   mulsubr   s     r   re   zDecoderTiny.forwardu  sj     JJq1u!  "t'B'B11$++qAAAA uuQx||Ar   r-  ro   s   @r   r/  r/  8  s}    &',', ', #s(O	',
 "#s(O', $'', ', ',R %,, r   r/  )%dataclassesr   typingr   r   numpyr   r   torch.nnrH   utilsr   utils.torch_utilsr   activationsr
   attention_processorr   unets.unet_2d_blocksr   r   r   r   r   r   Moduler#   rq   r   r   r   r   objectr   r  r#  r/  r   r   r   <module>rE     s   " "     - ( -  	J 	 	 
4J 
4 
4vbii vrFbii FRryy 82299 2jg299 gTwbii wt56 5p6 ?")) ?DH")) Hr   