
    biKA                     B   d dl mZmZmZ d dlZd dlmZ d dlZd dl	m
Z d dlmZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZ ej8                  j:                   G d de             Z G d dej>                        Z e G d dej>                  ee             Z!y)    )OptionalTupleUnionN)
FrozenDict   )ConfigMixinflax_register_to_config)
BaseOutput   )FlaxTimestepEmbeddingFlaxTimesteps)FlaxModelMixin)FlaxCrossAttnDownBlock2DFlaxDownBlock2DFlaxUNetMidBlock2DCrossAttnc                   N    e Zd ZU dZej
                  ed<   ej
                  ed<   y)FlaxControlNetOutputz
    The output of [`FlaxControlNetModel`].

    Args:
        down_block_res_samples (`jnp.ndarray`):
        mid_block_res_sample (`jnp.ndarray`):
    down_block_res_samplesmid_block_res_sampleN)__name__
__module____qualname____doc__jnpndarray__annotations__     g/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_flax.pyr   r   !   s      KK'++%r   r   c                       e Zd ZU eed<   dZeedf   ed<   ej                  Z	ej                  ed<   ddZ
d	ej                  dej                  fd
Zy)#FlaxControlNetConditioningEmbeddingconditioning_embedding_channels       `      .block_out_channelsdtypereturnNc                    t        j                  | j                  d   dd| j                        | _        g }t        t        | j                        dz
        D ]  }| j                  |   }| j                  |dz      }t        j                  |dd| j                        }|j                  |       t        j                  |ddd| j                        }|j                  |        || _        t        j                  | j                  ddt         j                  j                         t         j                  j                         | j                        | _        y )	Nr   r   r      r/   r.   )kernel_sizepaddingr)   r/   )r   r   r0   stridesr1   r)   r0   r1   kernel_init	bias_initr)   )nnConvr(   r)   conv_inrangelenappendblocksr"   initializers
zeros_initconv_out)selfr=   i
channel_inchannel_outconv1conv2s          r   setupz)FlaxControlNetConditioningEmbedding.setup4   s&   ww##A&$**	
 s4223a78 	!A003J11!a%8KGG"(jj	E MM% GG"(jjE MM% #	!$ 00$224oo002**
r   conditioningc                     | j                  |      }t        j                  |      }| j                  D ]  } ||      }t        j                  |      }! | j	                  |      }|S )N)r9   r7   silur=   r@   )rA   rH   	embeddingblocks       r   __call__z,FlaxControlNetConditioningEmbedding.__call__Z   s_    LL.	GGI&	[[ 	+Ei(I	*I	+ MM),	r   r*   N)r   r   r   intr   r(   r   r   float32r)   rG   r   rM   r   r   r   r!   r!   /   sN    %((*;c3h;{{E399"$
L
S[[ 
S[[ 
r   r!   c                   x   e Zd ZU dZdZeed<   dZeed<   dZe	e
df   ed<   d	Zeee	edf   f   ed
<   dZe	edf   ed<   dZeed<   dZeee	edf   f   ed<   dZeeee	edf   f      ed<   dZeed<   dZeed<   d	Zeed<   ej0                  Zej2                  ed<   dZeed<   dZeed<   dZe
ed<   dZe	edf   ed <   d!ej>                  d"e fd#Z!d-d$Z"	 	 	 d.d%ejF                  d&eejF                  eef   d'ejF                  d(ejF                  d)ed*ed+ed"ee$e	e	ejF                  df   ejF                  f   f   fd,Z%y)/FlaxControlNetModelu
  
    A ControlNet model.

    This model inherits from [`FlaxModelMixin`]. Check the superclass documentation for it’s generic methods
    implemented for all models (such as downloading or saving).

    This model is also a Flax Linen [`flax.linen.Module`](https://flax.readthedocs.io/en/latest/flax.linen.html#module)
    subclass. Use it as a regular Flax Linen module and refer to the Flax documentation for all matters related to its
    general usage and behavior.

    Inherent JAX features such as the following are supported:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        sample_size (`int`, *optional*):
            The size of the input sample.
        in_channels (`int`, *optional*, defaults to 4):
            The number of channels in the input sample.
        down_block_types (`Tuple[str]`, *optional*, defaults to `("FlaxCrossAttnDownBlock2D", "FlaxCrossAttnDownBlock2D", "FlaxCrossAttnDownBlock2D", "FlaxDownBlock2D")`):
            The tuple of downsample blocks to use.
        block_out_channels (`Tuple[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`):
            The tuple of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        attention_head_dim (`int` or `Tuple[int]`, *optional*, defaults to 8):
            The dimension of the attention heads.
        num_attention_heads (`int` or `Tuple[int]`, *optional*):
            The number of attention heads.
        cross_attention_dim (`int`, *optional*, defaults to 768):
            The dimension of the cross attention features.
        dropout (`float`, *optional*, defaults to 0):
            Dropout probability for down, up and bottleneck blocks.
        flip_sin_to_cos (`bool`, *optional*, defaults to `True`):
            Whether to flip the sin to cos in the time embedding.
        freq_shift (`int`, *optional*, defaults to 0): The frequency shift to apply to the time embedding.
        controlnet_conditioning_channel_order (`str`, *optional*, defaults to `rgb`):
            The channel order of conditional image. Will convert to `rgb` if it's `bgr`.
        conditioning_embedding_out_channels (`tuple`, *optional*, defaults to `(16, 32, 96, 256)`):
            The tuple of output channel for each block in the `conditioning_embedding` layer.
    r%   sample_size   in_channels)CrossAttnDownBlock2DrV   rV   DownBlock2D.down_block_typesFonly_cross_attention)i@  i     rZ   r(   r   layers_per_block   attention_head_dimNnum_attention_headsrZ   cross_attention_dimg        dropoutuse_linear_projectionr)   Tflip_sin_to_cosr   
freq_shiftrgb%controlnet_conditioning_channel_orderr#   #conditioning_embedding_out_channelsrngr*   c                 J   d| j                   | j                  | j                  f}t        j                  |t        j                        }t        j
                  dt        j                        }t        j                  dd| j                  ft        j                        }dd| j                  dz  | j                  dz  f}t        j                  |t        j                        }t        j                  j                  |      \  }}	||	d}
| j                  |
||||      d   S )Nr/   r)   )r/   r   r\   )paramsr`   rj   )rU   rS   r   zerosrP   onesint32r_   jaxrandomsplitinit)rA   rg   sample_shapesample	timestepsencoder_hidden_statescontrolnet_cond_shapecontrolnet_cond
params_rngdropout_rngrngss              r   init_weightsz FlaxControlNetModel.init_weights   s    4++T-=-=t?O?OP<s{{;HHT3	 #		1a1I1I*JRUR]R] ^!"At'7'7!';T=M=MPQ=Q R))$9M"%**"2"23"7
K$=yyvy2GYZbccr   c                    | j                   }|d   dz  }| j                  xs | j                  }t        j                  |d   ddd| j
                        | _        t        |d   | j                  | j                  j                        | _        t        || j
                        | _        t        |d   | j                  	      | _        | j"                  }t%        |t&              r|ft)        | j*                        z  }t%        |t,              r|ft)        | j*                        z  }g }g }|d   }t        j                  |dd
t        j.                  j1                         t        j.                  j1                         | j
                        }|j3                  |       t5        | j*                        D ]  \  }	}
|}||	   }|	t)        |      dz
  k(  }|
dk(  rDt7        ||| j8                  | j:                  ||	   | | j<                  ||	   | j
                  	      }n0t?        ||| j8                  | j:                  | | j
                        }|j3                  |       tA        | j:                        D ]p  }t        j                  |dd
t        j.                  j1                         t        j.                  j1                         | j
                        }|j3                  |       r |r5t        j                  |dd
t        j.                  j1                         t        j.                  j1                         | j
                        }|j3                  |        || _!        || _"        |d   }tG        || j8                  |d   | j<                  | j
                        | _$        t        j                  |dd
t        j.                  j1                         t        j.                  j1                         | j
                        | _%        y )Nr   rT   r,   r.   r-   r2   )rb   rc   ri   )r"   r(   VALIDr4   r/   rV   )	rU   out_channelsr`   
num_layersr^   add_downsamplera   rY   r)   )rU   r~   r`   r   r   r)   )rU   r`   r^   ra   r)   )&r(   r^   r]   r7   r8   r)   r9   r   rb   configrc   	time_projr   time_embeddingr!   rf   controlnet_cond_embeddingrY   
isinstanceboolr;   rX   rO   r>   r?   r<   	enumerater   r`   r[   ra   r   r:   down_blockscontrolnet_down_blocksr   	mid_blockcontrolnet_mid_block)rA   r(   time_embed_dimr^   rY   r   r   output_channelcontrolnet_blockrB   down_block_typeinput_channelis_final_block
down_block_mid_block_channels                   r   rG   zFlaxControlNetModel.setup   s   !44+A.2 #66Q$:Q:Q wwq!$**
 'q!43G3GTXT_T_TjTj
 4N$**U)L,>q,A#GG*
&
  $88*D1$8#:SAVAV=W#W )3/#6"83t?T?T;U"U !#+A.77224oo002**
 	%%&67"+D,A,A"B 1	@A*M/2N#&8"9A"==N"885 -!/ LL#44(;A(>'5#5*.*D*D)=a)@**

 - -!/ LL#44'5#5**
 z*4001 	@#%77" &# " : : < oo88:**$  '--.>?	@ "#%77" &# " : : < oo88:**$  '--.>?c1	@f '&<# /r24)LL 3B 7"&"<"<**
 %'GG224oo002**%
!r   rs   rt   ru   rw   conditioning_scalereturn_dicttrainc                 ,   | j                   }|dk(  rt        j                  |d      }t        |t        j                        s't        j
                  |gt        j                        }nht        |t        j                        rNt        |j                        dk(  r6|j                  t        j                        }t        j                  |d      }| j                  |      }	| j                  |	      }	t        j                  |d      }| j                  |      }t        j                  |d      }| j!                  |      }||z  }|f}
| j"                  D ]7  }t        |t$              r |||	||       \  }}n |||	|       \  }}|
|z  }
9 | j'                  ||	||       }d}t)        |
| j*                        D ]  \  }} ||      }||fz  } |}
| j-                  |      }|
D cg c]  }||z  	 }
}||z  }|s|
|fS t/        |
|	      S c c}w )
a  
        Args:
            sample (`jnp.ndarray`): (batch, channel, height, width) noisy inputs tensor
            timestep (`jnp.ndarray` or `float` or `int`): timesteps
            encoder_hidden_states (`jnp.ndarray`): (batch_size, sequence_length, hidden_size) encoder hidden states
            controlnet_cond (`jnp.ndarray`): (batch, channel, height, width) the conditional input tensor
            conditioning_scale (`float`, *optional*, defaults to `1.0`): the scale factor for controlnet outputs
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] instead of
                a plain tuple.
            train (`bool`, *optional*, defaults to `False`):
                Use deterministic functions and disable dropout when not training.

        Returns:
            [`~models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] or `tuple`:
                [`~models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] if `return_dict` is True, otherwise
                a `tuple`. When returning a tuple, the first element is the sample tensor.
        bgrr/   )axisri   r   )r   r   r   r/   )deterministicr   )r   r   )re   r   flipr   r   arrayrm   r;   shapeastyperP   expand_dimsr   r   	transposer9   r   r   r   r   zipr   r   r   )rA   rs   rt   ru   rw   r   r   r   channel_ordert_embr   r   res_samples!controlnet_down_block_res_samplesdown_block_res_sampler   r   s                    r   rM   zFlaxControlNetModel.__call__:  s2   8 BBE!!hhQ?O )S[[1		9+SYY?I	3;;/C	4HA4M!((s{{(;I	15Iy)##E* v|4f%--F88I/! #)** 	2J*&>?&0@Uinen&o#&0RWi&X#"k1"	2 /DX]T]^ -/)7:;QSWSnSn7o 	J3!#3$45J$K!-2G1II-	J "C#88@ Mc!c&&+="=!c!c 22*,@AA##9Pd
 	
 "ds   +HrN   )g      ?TF)&r   r   r   r   rS   rO   r   rU   rX   r   strrY   r   r   r(   r[   r]   r^   r   r_   r`   floatra   r   rP   r)   rb   rc   re   rf   rn   Arrayr   r{   rG   r   r   rM   r   r   r   rR   rR   g   s   +Z KK)eCHo  ;@%eD#I&6 67?*@c3h@c67c5c?237AE%U38_(<"=>E##GU"'4'{{E399" OT J16)36;L'sCxLd		 dj d~
L %( Q
Q
 eS01Q
  #{{	Q

 Q
 "Q
 Q
 Q
 
#U5c1A+BCKK+O%PP	QQ
r   rR   )"typingr   r   r   flax
flax.linenlinenr7   rn   	jax.numpynumpyr   flax.core.frozen_dictr   configuration_utilsr   r	   utilsr
   embeddings_flaxr   r   modeling_flax_utilsr   unets.unet_2d_blocks_flaxr   r   r   struct	dataclassr   Moduler!   rR   r   r   r   <module>r      s    * )   
  , G  B 0  
&: 
& 
&5")) 5p c
"))^[ c
 c
r   