
    bi|                        d dl Z d dlmZ d dlmZ d dlZd dlmZ d dl	Z	d dl
mZ d dlmZ ddlmZmZ ddlmZ dd	lmZ ej,                  j.                   G d
 de             Zej,                  j.                   G d de             Z G d dej4                        Z G d dej4                        Z G d dej4                        Z G d dej4                        Z G d dej4                        Z G d dej4                        Z  G d dej4                        Z! G d dej4                        Z" G d dej4                        Z# G d  d!e$      Z%e G d" d#ej4                  ee             Z&y)$    N)partial)Tuple)
FrozenDict   )ConfigMixinflax_register_to_config)
BaseOutput   )FlaxModelMixinc                   0    e Zd ZU dZej
                  ed<   y)FlaxDecoderOutputa;  
    Output of decoding method.

    Args:
        sample (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)`):
            The decoded output sample from the last layer of the model.
        dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`):
            The `dtype` of the parameters.
    sampleN)__name__
__module____qualname____doc__jnpndarray__annotations__     T/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/vae_flax.pyr   r       s     KKr   r   c                       e Zd ZU dZded<   y)FlaxAutoencoderKLOutputaL  
    Output of AutoencoderKL encoding method.

    Args:
        latent_dist (`FlaxDiagonalGaussianDistribution`):
            Encoded outputs of `Encoder` represented as the mean and logvar of `FlaxDiagonalGaussianDistribution`.
            `FlaxDiagonalGaussianDistribution` allows for sampling latents from the distribution.
     FlaxDiagonalGaussianDistributionlatent_distN)r   r   r   r   r   r   r   r   r   r   /   s     43r   r   c                   ^    e Zd ZU dZeed<   ej                  Zej                  ed<   d Z	d Z
y)FlaxUpsample2Dz
    Flax implementation of 2D Upsample layer

    Args:
        in_channels (`int`):
            Input channels
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    in_channelsdtypec                 j    t        j                  | j                  ddd| j                        | _        y )N   r#   r
   r
   r$   r$   kernel_sizestridespaddingr    nnConvr   r    convselfs    r   setupzFlaxUpsample2D.setupK   s+    GG$**
	r   c                     |j                   \  }}}}t        j                  j                  |||dz  |dz  |fd      }| j	                  |      }|S )Nr   nearest)shapemethod)r3   jaximageresizer-   )r/   hidden_statesbatchheightwidthchannelss         r   __call__zFlaxUpsample2D.__call__T   s_    )6)<)<&vuh		((&1*eai: ) 

 		-0r   Nr   r   r   r   intr   r   float32r    r0   r=   r   r   r   r   r   =   s+     {{E399"
r   r   c                   ^    e Zd ZU dZeed<   ej                  Zej                  ed<   d Z	d Z
y)FlaxDownsample2Dz
    Flax implementation of 2D Downsample layer

    Args:
        in_channels (`int`):
            Input channels
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    r   r    c                 j    t        j                  | j                  ddd| j                        | _        y )Nr"   )r   r   VALIDr&   r*   r.   s    r   r0   zFlaxDownsample2D.setupm   s+    GG**
	r   c                 Z    d}t        j                  ||      }| j                  |      }|S )N)r   r   r   r
   rG   rF   )	pad_width)r   padr-   )r/   r8   rI   s      r   r=   zFlaxDownsample2D.__call__v   s+    .=		-0r   Nr>   r   r   r   rB   rB   _   s+     {{E399"
r   rB   c                       e Zd ZU dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed	<   ej                  Zej                  ed
<   d ZddZy)FlaxResnetBlock2Da  
    Flax implementation of 2D Resnet Block.

    Args:
        in_channels (`int`):
            Input channels
        out_channels (`int`):
            Output channels
        dropout (:obj:`float`, *optional*, defaults to 0.0):
            Dropout rate
        groups (:obj:`int`, *optional*, defaults to `32`):
            The number of groups to use for group norm.
        use_nin_shortcut (:obj:`bool`, *optional*, defaults to `None`):
            Whether to use `nin_shortcut`. This activates a new layer inside ResNet block
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    r   Nout_channels        dropout    groupsuse_nin_shortcutr    c                    | j                   | j                  n| j                   }t        j                  | j                  d      | _        t        j                  |ddd| j                        | _        t        j                  | j                  d      | _	        t        j                  | j                        | _        t        j                  |ddd| j                        | _        | j                  | j                  |k7  n| j                  }d | _        |r*t        j                  |ddd| j                        | _        y y )Nư>
num_groupsepsilonr"   r$   r%   r&   rD   )rL   r   r+   	GroupNormrP   norm1r,   r    conv1norm2DropoutrN   dropout_layerconv2rQ   conv_shortcut)r/   rL   rQ   s      r   r0   zFlaxResnetBlock2D.setup   s   +/+<+<+Dt''$J[J[\\T[[$G
WW$**

 \\T[[$G
ZZ5WW$**

 @D?T?T?\4++|;bfbwbw!!#"jj"D r   c                 J   |}| j                  |      }t        j                  |      }| j                  |      }| j	                  |      }t        j                  |      }| j                  ||      }| j                  |      }| j                  | j                  |      }||z   S N)rX   r+   swishrY   rZ   r\   r]   r^   )r/   r8   deterministicresiduals       r   r=   zFlaxResnetBlock2D.__call__   s     

=1/

=1

=1/**=-H

=1)))(3Hx''r   T)r   r   r   r   r?   r   rL   rN   floatrP   rQ   boolr   r@   r    r0   r=   r   r   r   rK   rK   }   sU    $ L#GUFC!d!{{E399" D(r   rK   c                       e Zd ZU dZeed<   dZeed<   dZeed<   ej                  Z
ej                  ed<   d Zd	 Zd
 Zy)FlaxAttentionBlocka  
    Flax Convolutional based multi-head attention block for diffusion-based VAE.

    Parameters:
        channels (:obj:`int`):
            Input channels
        num_head_channels (:obj:`int`, *optional*, defaults to `None`):
            Number of attention heads
        num_groups (:obj:`int`, *optional*, defaults to `32`):
            The number of groups to use for group norm
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`

    r<   Nnum_head_channelsrO   rU   r    c                 j   | j                   | j                  | j                   z  nd| _        t        t        j
                  | j                  | j                        }t	        j                  | j                  d      | _	         |        |        |       c| _
        | _        | _         |       | _        y )Nr
   r    rS   rT   )ri   r<   	num_headsr   r+   Denser    rW   rU   
group_normquerykeyvalue	proj_attn)r/   denses     r   r0   zFlaxAttentionBlock.setup   s{    DHDZDZDf$*@*@@lm$--tzzB,,$//4P+07EGUW(
DHdjr   c                     |j                   d d | j                  dfz   }|j                  |      }t        j                  |d      }|S )Nr   r   r
   r#   )r3   rl   reshaper   	transpose)r/   
projectionnew_projection_shapenew_projections       r   transpose_for_scoresz'FlaxAttentionBlock.transpose_for_scores   sJ    )//47KK#++,@A~|Dr   c                 @   |}|j                   \  }}}}| j                  |      }|j                  |||z  |f      }| j                  |      }| j	                  |      }| j                  |      }	| j                  |      }| j                  |      }| j                  |	      }	dt        j                  t        j                  | j                  | j                  z              z  }
t        j                  d||
z  ||
z        }t        j                  |d      }t        j                  d|	|      }t        j                  |d      }|j                   d d | j                  fz   }|j                  |      }| j!                  |      }|j                  ||||f      }||z   }|S )Nr
   z...qc,...kc->...qkru   axisz...kc,...qk->...qcrv   )r3   rn   rw   ro   rp   rq   r|   mathsqrtr<   rl   r   einsumr+   softmaxrx   rr   )r/   r8   rc   r9   r:   r;   r<   ro   rp   rq   scaleattn_weightsnew_hidden_states_shapes                r   r=   zFlaxAttentionBlock.__call__   s    )6)<)<&vuh6%--ufunh.OP

=)hh}%

=) ))%0'',))%0 DIIdii(FGHHzz"6sU{Szz,R8 

#7Mm\B"/"5"5cr":dmm=M"M%--.EF}5%--ufeX.NO%0r   )r   r   r   r   r?   r   ri   rU   r   r@   r    r0   r|   r=   r   r   r   rh   rh      sD     M!s!J{{E399"! r   rh   c                       e Zd ZU dZeed<   eed<   dZeed<   dZeed<   dZ	eed	<   d
Z
eed<   ej                  Zej                  ed<   d ZddZy)FlaxDownEncoderBlock2Da  
    Flax Resnet blocks-based Encoder block for diffusion-based VAE.

    Parameters:
        in_channels (:obj:`int`):
            Input channels
        out_channels (:obj:`int`):
            Output channels
        dropout (:obj:`float`, *optional*, defaults to 0.0):
            Dropout rate
        num_layers (:obj:`int`, *optional*, defaults to 1):
            Number of Resnet layer block
        resnet_groups (:obj:`int`, *optional*, defaults to `32`):
            The number of groups to use for the Resnet block group norm
        add_downsample (:obj:`bool`, *optional*, defaults to `True`):
            Whether to add downsample layer
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    r   rL   rM   rN   r
   
num_layersrO   resnet_groupsTadd_downsampler    c                 |   g }t        | j                        D ]h  }|dk(  r| j                  n| j                  }t	        || j                  | j
                  | j                  | j                        }|j                  |       j || _	        | j                  r't        | j                  | j                        | _        y y Nr   r   rL   rN   rP   r    rk   )ranger   r   rL   rK   rN   r   r    appendresnetsr   rB   downsamplers_0r/   r   ir   	res_blocks        r   r0   zFlaxDownEncoderBlock2D.setup0  s    t' 
	&A./1f$**$:K:KK)'!..))jjI NN9%
	& "243D3DDJJ"WD r   c                 v    | j                   D ]  } |||      } | j                  r| j                  |      }|S Nrb   )r   r   r   r/   r8   rb   resnets       r   r=   zFlaxDownEncoderBlock2D.__call__B  sD    ll 	OF"=NM	O  //>Mr   Nrd   )r   r   r   r   r?   r   rN   re   r   r   r   rf   r   r@   r    r0   r=   r   r   r   r   r     sZ    ( GUJM3ND{{E399"X$r   r   c                       e Zd ZU dZeed<   eed<   dZeed<   dZeed<   dZ	eed	<   d
Z
eed<   ej                  Zej                  ed<   d ZddZy)FlaxUpDecoderBlock2Da  
    Flax Resnet blocks-based Decoder block for diffusion-based VAE.

    Parameters:
        in_channels (:obj:`int`):
            Input channels
        out_channels (:obj:`int`):
            Output channels
        dropout (:obj:`float`, *optional*, defaults to 0.0):
            Dropout rate
        num_layers (:obj:`int`, *optional*, defaults to 1):
            Number of Resnet layer block
        resnet_groups (:obj:`int`, *optional*, defaults to `32`):
            The number of groups to use for the Resnet block group norm
        add_upsample (:obj:`bool`, *optional*, defaults to `True`):
            Whether to add upsample layer
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    r   rL   rM   rN   r
   r   rO   r   Tadd_upsampler    c                 |   g }t        | j                        D ]h  }|dk(  r| j                  n| j                  }t	        || j                  | j
                  | j                  | j                        }|j                  |       j || _	        | j                  r't        | j                  | j                        | _        y y r   )r   r   r   rL   rK   rN   r   r    r   r   r   r   upsamplers_0r   s        r   r0   zFlaxUpDecoderBlock2D.setupi  s    t' 		&A./1f$**$:K:KK)'!..))jjI NN9%		&  .t/@/@

 SD r   c                 v    | j                   D ]  } |||      } | j                  r| j                  |      }|S r   )r   r   r   r   s       r   r=   zFlaxUpDecoderBlock2D.__call__{  sD    ll 	OF"=NM	O  --m<Mr   Nrd   )r   r   r   r   r?   r   rN   re   r   r   r   rf   r   r@   r    r0   r=   r   r   r   r   r   L  sZ    ( GUJM3L${{E399"T$r   r   c                       e Zd ZU dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed	<   ej                  Zej                  ed
<   d ZddZy)FlaxUNetMidBlock2Da  
    Flax Unet Mid-Block module.

    Parameters:
        in_channels (:obj:`int`):
            Input channels
        dropout (:obj:`float`, *optional*, defaults to 0.0):
            Dropout rate
        num_layers (:obj:`int`, *optional*, defaults to 1):
            Number of Resnet layer block
        resnet_groups (:obj:`int`, *optional*, defaults to `32`):
            The number of groups to use for the Resnet and Attention block group norm
        num_attention_heads (:obj:`int`, *optional*, defaults to `1`):
            Number of attention heads for each attention block
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    r   rM   rN   r
   r   rO   r   num_attention_headsr    c                 :   | j                   | j                   nt        | j                  dz  d      }t        | j                  | j                  | j                  || j
                        g}g }t        | j                        D ]  }t        | j                  | j                  || j
                        }|j                  |       t        | j                  | j                  | j                  || j
                        }|j                  |        || _        || _        y )N   rO   r   )r<   ri   rU   r    )r   minr   rK   rN   r    r   r   rh   r   r   r   
attentions)r/   r   r   r   _
attn_blockr   s          r   r0   zFlaxUNetMidBlock2D.setup  s   .2.@.@.L**RUVZVfVfjkVkmoRp  ,,!--$jj
 
t' 	&A+))"&":":(jj	J j)) ,,!--$jjI NN9%!	&$ $r   c                      | j                   d   ||      }t        | j                  | j                   dd        D ]  \  }} ||      } |||      } |S )Nr   r   r
   )r   zipr   )r/   r8   rb   attnr   s        r   r=   zFlaxUNetMidBlock2D.__call__  sa    'Q]Sab1AB 	OLD& /M"=NM	O r   Nrd   )r   r   r   r   r?   r   rN   re   r   r   r   r   r@   r    r0   r=   r   r   r   r   r     sU    $ GUJM3  {{E399"#%Jr   r   c                       e Zd ZU dZdZeed<   dZeed<   dZe	e
   ed<   dZe	e   ed<   d	Zeed
<   dZeed<   dZe
ed<   dZeed<   ej$                  Zej&                  ed<   d ZddefdZy)FlaxEncodera  
    Flax Implementation of VAE Encoder.

    This model is a Flax Linen [flax.linen.Module](https://flax.readthedocs.io/en/latest/flax.linen.html#module)
    subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to
    general usage and behavior.

    Finally, this model supports inherent JAX features such as:
    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        in_channels (:obj:`int`, *optional*, defaults to 3):
            Input channels
        out_channels (:obj:`int`, *optional*, defaults to 3):
            Output channels
        down_block_types (:obj:`Tuple[str]`, *optional*, defaults to `(DownEncoderBlock2D)`):
            DownEncoder block type
        block_out_channels (:obj:`Tuple[str]`, *optional*, defaults to `(64,)`):
            Tuple containing the number of output channels for each block
        layers_per_block (:obj:`int`, *optional*, defaults to `2`):
            Number of Resnet layer for each block
        norm_num_groups (:obj:`int`, *optional*, defaults to `32`):
            norm num group
        act_fn (:obj:`str`, *optional*, defaults to `silu`):
            Activation function
        double_z (:obj:`bool`, *optional*, defaults to `False`):
            Whether to double the last output channels
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    r#   r   rL   DownEncoderBlock2Ddown_block_types@   block_out_channelsr   layers_per_blockrO   norm_num_groupssiluact_fnFdouble_zr    c           	         | j                   }t        j                  |d   ddd| j                        | _        g }|d   }t        | j                        D ]^  \  }}|}||   }|t        |      dz
  k(  }t        ||| j                  | j                  | | j                        }|j                  |       ` || _        t        |d   | j                  d | j                  	      | _        | j                  rd
| j                   z  n| j                   }	t        j"                  | j                  d      | _        t        j                  |	ddd| j                        | _        y )Nr   r"   r$   r%   r&   r
   )r   rL   r   r   r   r    ru   r   r   r   r    r   rS   rT   )r   r+   r,   r    conv_in	enumerater   lenr   r   r   r   down_blocksr   	mid_blockr   rL   rW   conv_norm_outconv_out)
r/   r   r   output_channelr   r   input_channelis_final_block
down_blockconv_out_channelss
             r   r0   zFlaxEncoder.setup  sY   !44wwq!$**
 +A.d334 	+DAq*M/2N#&8"9A"==N/)+00"22#11jjJ z*	+ ' ,*2... $**	
 6:]]A 1 11HYHY\\T5I5ISWX$**
r   rb   c                     | j                  |      }| j                  D ]  } |||      } | j                  ||      }| j                  |      }t	        j
                  |      }| j                  |      }|S r   )r   r   r   r   r+   ra   r   r/   r   rb   blocks       r   r=   zFlaxEncoder.__call__+  s|    f% %% 	@E6?F	@ mD ##F+&!v&r   Nrd   )r   r   r   r   r   r?   r   rL   r   r   strr   r   r   r   r   rf   r   r@   r    r0   r=   r   r   r   r   r     s     D KL##:eCj:%*c
*cOSFCHd{{E399"/
bd r   r   c                       e Zd ZU dZdZeed<   dZeed<   dZe	e
   ed<   dZeed<   d	Zeed
<   dZeed<   dZe
ed<   ej                   Zej"                  ed<   d ZddefdZy)FlaxDecodera  
    Flax Implementation of VAE Decoder.

    This model is a Flax Linen [flax.linen.Module](https://flax.readthedocs.io/en/latest/flax.linen.html#module)
    subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to
    general usage and behavior.

    Finally, this model supports inherent JAX features such as:
    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        in_channels (:obj:`int`, *optional*, defaults to 3):
            Input channels
        out_channels (:obj:`int`, *optional*, defaults to 3):
            Output channels
        up_block_types (:obj:`Tuple[str]`, *optional*, defaults to `(UpDecoderBlock2D)`):
            UpDecoder block type
        block_out_channels (:obj:`Tuple[str]`, *optional*, defaults to `(64,)`):
            Tuple containing the number of output channels for each block
        layers_per_block (:obj:`int`, *optional*, defaults to `2`):
            Number of Resnet layer for each block
        norm_num_groups (:obj:`int`, *optional*, defaults to `32`):
            norm num group
        act_fn (:obj:`str`, *optional*, defaults to `silu`):
            Activation function
        double_z (:obj:`bool`, *optional*, defaults to `False`):
            Whether to double the last output channels
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            parameters `dtype`
    r#   r   rL   UpDecoderBlock2Dup_block_typesr   r   r   r   rO   r   r   r   r    c           	         | j                   }t        j                  |d   ddd| j                        | _        t        |d   | j                  d | j                        | _        t        t        |            }|d   }g }t        | j                        D ]c  \  }}|}||   }|t        |      dz
  k(  }t        ||| j                  dz   | j                  | | j                  	      }	|j                  |	       |}e || _        t        j"                  | j                  d
      | _        t        j                  | j&                  ddd| j                        | _        y )Nru   r"   r$   r%   r&   r   r   r
   )r   rL   r   r   r   r    rS   rT   )r   r+   r,   r    r   r   r   r   listreversedr   r   r   r   r   r   	up_blocksrW   r   rL   r   )
r/   r   reversed_block_out_channelsr   r   r   r   prev_output_channelr   up_blocks
             r   r0   zFlaxDecoder.setupj  s^   !44 wwr"$**
 ,*2... $**	
 '+84F+G&H#4Q7	d112 	1DAq"08;N#&8"9A"==N+/+0014"22!//jjH X&"0	1" #  \\T5I5ISWX$**
r   rb   c                     | j                  |      }| j                  ||      }| j                  D ]  } |||      } | j                  |      }t	        j
                  |      }| j                  |      }|S r   )r   r   r   r   r+   ra   r   r   s       r   r=   zFlaxDecoder.__call__  sz    f% mD ^^ 	@E6?F	@ ##F+&!v&r   Nrd   )r   r   r   r   r   r?   r   rL   r   r   r   r   r   r   r   r   r@   r    r0   rf   r=   r   r   r   r   r   >  sz     D KL#!6NE#J6##cOSFC{{E399"3
jd r   r   c                   6    e Zd ZddZd Zd	dZg dfdZd Zy)
r   c                    t        j                  |dd      \  | _        | _        t        j                  | j                  dd      | _        || _        t        j                  d| j                  z        | _        t        j                  | j                        | _        | j
                  r,t        j                  | j                        x| _        | _        y y )Nr   ru   r~   g      >g      4@      ?)
r   splitmeanlogvarcliprb   expstdvar
zeros_like)r/   
parametersrb   s      r   __init__z)FlaxDiagonalGaussianDistribution.__init__  s    !$:qr!B	4;hht{{E48*773,-774;;'"%..";;DHtx r   c                     | j                   | j                  t        j                  j	                  || j                   j
                        z  z   S r`   )r   r   r5   randomnormalr3   )r/   rp   s     r   r   z'FlaxDiagonalGaussianDistribution.sample  s3    yy488cjj&7&7TYY__&MMMMr   Nc                    | j                   rt        j                  dg      S |Fdt        j                  | j                  dz  | j
                  z   dz
  | j                  z
  g d      z  S dt        j                  t        j                  | j                  |j                  z
        |j
                  z  | j
                  |j
                  z  z   dz
  | j                  z
  |j                  z   g d      z  S )NrM   r   r   g      ?r
   r   r#   r~   )rb   r   arraysumr   r   r   square)r/   others     r   klz#FlaxDiagonalGaussianDistribution.kl  s    99cU##=A!83!>!LS\]]]SWWJJtyy5::-.:TXX		=QQTWWZ^ZeZeehmhthtt
 
 	
r   r   c                 @   | j                   rt        j                  dg      S t        j                  dt        j                  z        }dt        j
                  || j                  z   t        j                  || j                  z
        | j                  z  z   |      z  S )NrM   g       @r   r~   )
rb   r   r   logpir   r   r   r   r   )r/   r   r   logtwopis       r   nllz$FlaxDiagonalGaussianDistribution.nll  su    99cU##773<(SWWX3cjj$))AS6TW[W_W_6__fjkkkr   c                     | j                   S r`   )r   r.   s    r   modez%FlaxDiagonalGaussianDistribution.mode  s    yyr   )Fr`   )r   r   r   r   r   r   r   r   r   r   r   r   r     s"    <N

  ) lr   r   c                   T   e Zd ZU dZdZeed<   dZeed<   dZe	e
   ed<   dZe	e
   ed<   d	Ze	e   ed
<   dZeed<   dZe
ed<   dZeed<   dZeed<   dZeed<   dZeed<   ej*                  Zej,                  ed<   d Zdej2                  defdZd!dedefdZd!dedefdZd"dedefdZy )#FlaxAutoencoderKLa  
    Flax implementation of a VAE model with KL loss for decoding latent representations.

    This model inherits from [`FlaxModelMixin`]. Check the superclass documentation for it's generic methods
    implemented for all models (such as downloading or saving).

    This model is a Flax Linen [flax.linen.Module](https://flax.readthedocs.io/en/latest/flax.linen.html#module)
    subclass. Use it as a regular Flax Linen module and refer to the Flax documentation for all matter related to its
    general usage and behavior.

    Inherent JAX features such as the following are supported:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        in_channels (`int`, *optional*, defaults to 3):
            Number of channels in the input image.
        out_channels (`int`, *optional*, defaults to 3):
            Number of channels in the output.
        down_block_types (`Tuple[str]`, *optional*, defaults to `(DownEncoderBlock2D)`):
            Tuple of downsample block types.
        up_block_types (`Tuple[str]`, *optional*, defaults to `(UpDecoderBlock2D)`):
            Tuple of upsample block types.
        block_out_channels (`Tuple[str]`, *optional*, defaults to `(64,)`):
            Tuple of block output channels.
        layers_per_block (`int`, *optional*, defaults to `2`):
            Number of ResNet layer for each block.
        act_fn (`str`, *optional*, defaults to `silu`):
            The activation function to use.
        latent_channels (`int`, *optional*, defaults to `4`):
            Number of channels in the latent space.
        norm_num_groups (`int`, *optional*, defaults to `32`):
            The number of groups for normalization.
        sample_size (`int`, *optional*, defaults to 32):
            Sample input size.
        scaling_factor (`float`, *optional*, defaults to 0.18215):
            The component-wise standard deviation of the trained latent space computed using the first batch of the
            training set. This is used to scale the latent space to have unit variance when training the diffusion
            model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the
            diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z = 1
            / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution Image
            Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
        dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`):
            The `dtype` of the parameters.
    r#   r   rL   r   r   r   r   r   r   r
   r   r   r   r   latent_channelsrO   r   sample_sizeg{P?scaling_factorr    c                    t        | j                  j                  | j                  j                  | j                  j                  | j                  j
                  | j                  j                  | j                  j                  | j                  j                  d| j                  	      | _
        t        | j                  j                  | j                  j                  | j                  j                  | j                  j
                  | j                  j                  | j                  j                  | j                  j                  | j                        | _        t        j                   d| j                  j                  z  ddd| j                        | _        t        j                   | j                  j                  ddd| j                        | _        y )NT)	r   rL   r   r   r   r   r   r   r    )r   rL   r   r   r   r   r   r    r   r$   rD   r&   )r   configr   r   r   r   r   r   r   r    encoderr   rL   r   decoderr+   r,   
quant_convpost_quant_convr.   s    r   r0   zFlaxAutoencoderKL.setup  sG   "//44![[99#{{==![[99;;%% KK77**

 #3311;;55#{{==![[99 KK77;;%%**	
 ''+++**
  "wwKK''** 
r   rngreturnc                    d| j                   | j                  | j                  f}t        j                  |t        j                        }t
        j                  j                  |d      \  }}}|||d}| j                  ||      d   S )Nr
   rk   r#   )paramsrN   gaussianr  )	r   r   r   zerosr@   r5   r   r   init)r/   r  sample_shaper   
params_rngdropout_rnggaussian_rngrngss           r   init_weightszFlaxAutoencoderKL.init_weights;  su    4++T-=-=t?O?OP<s{{;03

0@0@a0H-
K$,Wyyv&x00r   rb   return_dictc                     t        j                  |d      }| j                  ||      }| j                  |      }t	        |      }|s|fS t        |      S )Nr   r   r#   r
   r   )r   )r   rx   r   r  r   r   )r/   r   rb   r  r8   moments	posteriors          r   encodezFlaxAutoencoderKL.encodeE  sR    v|4V=I//-04W=	<&9==r   c                    |j                   d   | j                  j                  k7  rt        j                  |d      }| j                  |      }| j                  ||      }t        j                  |d      }|s|fS t        |      S )Nru   r  r   )r   r#   r
   r   r   )r3   r   r   r   rx   r  r   r   )r/   latentsrb   r  r8   s        r   decodezFlaxAutoencoderKL.decodeQ  sv    == ; ;;mmG\:G,,W5]-Pm\B!## 66r   c                    | j                  |||      }|r-| j                  d      }|j                  j                  |      }n|j                  j	                         }| j                  ||      j                  }|s|fS t        |      S )N)rb   r  r  )r  r  )r  make_rngr   r   r   r  r   )r/   r   sample_posteriorrb   r  r  r  r8   s           r   r=   zFlaxAutoencoderKL.__call___  s    KKmQ\K]	--
+C%1188=M%11668M]DKK9 //r   N)TT)FTT) r   r   r   r   r   r?   r   rL   r   r   r   r   r   r   r   r   r   r   r   re   r   r@   r    r0   r5   Arrayr   r  rf   r  r  r=   r   r   r   r   r     s    /b KL##:eCj:!6NE#J6%*c
*cFCOSOSK#NE#{{E399"#
J1		 1j 1
>D 
>d 
>7T 7t 70d 0`d 0r   r   )'r   	functoolsr   typingr   flax
flax.linenlinenr+   r5   	jax.numpynumpyr   flax.core.frozen_dictr   configuration_utilsr   r   utilsr	   modeling_flax_utilsr   struct	dataclassr   r   Moduler   rB   rK   rh   r   r   r   r   r   objectr   r   r   r   r   <module>r-     sG  "      
  , F  / 
   
4j 
4 
4RYY Dryy <J(		 J(ZF FR6RYY 6r6299 6rE EPn")) nbp")) pf"v "J U0		>; U0 U0r   