
    biKC                        d dl mZmZ d dlZd dlmc mZ d dlmZ d dlm	Z	 ddl
mZmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZmZ ddlmZmZ ddlmZ ddlmZmZ ddl m!Z!m"Z"  G d deee      Z# G d dejH                        Z% G d dejH                        Z& G d dejH                        Z' G d dejH                        Z(y)    )DictUnionN)nn
checkpoint   )ConfigMixinregister_to_config)PeftAdapterMixin   )BasicTransformerBlockSkipFFTransformerBlock)ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORSAttentionProcessorAttnAddedKVProcessorAttnProcessor)TimestepEmbeddingget_timestep_embedding)
ModelMixin)GlobalResponseNormRMSNorm)Downsample2D
Upsample2Dc            .           e Zd ZdZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddededededededed	ed
edededededededededededededef, fd       Zd dZ	e
deeef   fd       Zdeeeeef   f   fdZd Z xZS )!UVit2DModelThidden_sizeuse_biashidden_dropoutcond_embed_dimmicro_cond_encode_dimmicro_cond_embed_dimencoder_hidden_size
vocab_sizecodebook_sizein_channelsblock_out_channelsnum_res_blocks
downsampleupsampleblock_num_headsnum_hidden_layersnum_attention_headsattention_dropoutintermediate_sizelayer_norm_epsln_elementwise_affinesample_sizec                    t         |           t        j                  |||      | _        t        |||      | _        t        |
|||||      | _        t        ||z   ||      | _
        t        ||||||||||d      | _        t        |||      | _        t        j                  |||      | _        t        j                  t!        |      D cg c]  }t#        ||||z  |||d|||||||        c}      | _        t        |||      | _        t        j                  |||      | _        t        |||||||||d|      | _        t-        ||
||||	      | _        d| _        y c c}w )Nbias)sample_proj_biasFada_norm_continuous)dimr-   attention_head_dimdropoutcross_attention_dimattention_bias	norm_type-ada_norm_continous_conditioning_embedding_dimnorm_elementwise_affinenorm_epsada_norm_biasff_inner_dimff_biasattention_out_bias)r)   r*   )super__init__r   Linearencoder_projr   encoder_proj_layer_normUVit2DConvEmbedembedr   
cond_embed	UVitBlock
down_blockproject_to_hidden_normproject_to_hidden
ModuleListranger   transformer_layersproject_from_hidden_normproject_from_hiddenup_blockConvMlmLayer	mlm_layergradient_checkpointing)selfr   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   _	__class__s                           Y/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/unets/uvit_2d.pyrF   zUVit2DModel.__init__*   s   D 	II&9;XV'.{NLa'b$$+Z9NP^`h

 , >1;QY
 $!
 '..@.Rg&h#!#+={QY!Z"$--$ 01#" ! &#(;'26I'I*(3#+3BM,A+"*!2$'/#
, )0^Mb(c%#%99[:LS[#\ !!
 &X7Ln^k
 ',#Us   #E%c                    | j                  |      }| j                  |      }t        |j                         | j                  j
                  dd      }|j                  |j                  d   df      }t        j                  ||gd      }|j                  | j                        }| j                  |      j                  |j                        }| j                  |      }| j                  ||||      }|j                  \  }}	}
}|j                  dd	d
d      j                  ||
|z  |	      }| j!                  |      }| j#                  |      }| j$                  D ]8  t        j&                         r| j(                  rfd}n} ||||d|i      }: | j+                  |      }| j-                  |      }|j                  ||
||	      j                  dd
dd	      }| j/                  ||||      }| j1                  |      }|S )NTr   )flip_sin_to_cosdownscale_freq_shift   r8   )dtype)pooled_text_embencoder_hidden_statescross_attention_kwargsr   r   c                      t        g|  S Nr   )argslayers    r]   layer_z#UVit2DModel.forward.<locals>.layer_   s    %e3d33    re   )rf   rg   added_cond_kwargs)rH   rI   r   flattenconfigr!   reshapeshapetorchcattord   rL   rK   rN   permuterO   rP   rS   is_grad_enabledrY   rT   rU   rV   rX   )rZ   	input_idsrf   re   micro_condsrg   micro_cond_embedshidden_states
batch_sizechannelsheightwidthrl   logitsrk   s                 @r]   forwardzUVit2DModel.forward   s    $ 1 12G H $ < <=R S2!4;;#D#DVZqr
 .55yq7I26NO))_6G$HaP),,4::,>///:==>S>Y>YZ

9-+"7#9	 ( 
 /<.A.A+
Hfe%--aAq9AA*fW\n^fg33MB..}=,, 	E$$&4+F+F4 "&;'=#4o"F	M	  55mD00?%--j&%RZZ[\^_abdef+"7#9	 & 
 .rm   returnc                     i }dt         dt        j                  j                  dt        t         t
        f   ffd| j                         D ]  \  }} |||        |S )z
        Returns:
            `dict` of attention processors: A dictionary containing all attention processors used in the model with
            indexed by its weight name.
        namemodule
processorsc                     t        |d      r|j                         ||  d<   |j                         D ]  \  }} |  d| ||        |S )Nget_processor
.processor.)hasattrr   named_children)r   r   r   sub_namechildfn_recursive_add_processorss        r]   r   z@UVit2DModel.attn_processors.<locals>.fn_recursive_add_processors   sd    v/282F2F2H
dV:./#)#8#8#: U%+tfAhZ,@%TU rm   )strrs   r   Moduler   r   r   )rZ   r   r   r   r   s       @r]   attn_processorszUVit2DModel.attn_processors   sm     
	c 	588?? 	X\]`bt]tXu 	 !//1 	BLD&'fjA	B rm   	processorc           	      T   t        | j                  j                               }t        |t              r,t        |      |k7  rt        dt        |       d| d| d      dt        dt        j                  j                  ffd| j                         D ]  \  }} |||        y)	a4  
        Sets the attention processor to use to compute attention.

        Parameters:
            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
                The instantiated processor class or a dictionary of processor classes that will be set as the processor
                for **all** `Attention` layers.

                If `processor` is a dict, the key needs to define the path to the corresponding cross attention
                processor. This is strongly recommended when setting trainable attention processors.

        z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.r   r   c                     t        |d      rEt        |t              s|j                  |       n#|j                  |j	                  |  d             |j                         D ]  \  }} |  d| ||        y )Nset_processorr   r   )r   
isinstancedictr   popr   )r   r   r   r   r   fn_recursive_attn_processors        r]   r   zCUVit2DModel.set_attn_processor.<locals>.fn_recursive_attn_processor  sx    v/!)T2((3(($z7J)KL#)#8#8#: T%+tfAhZ,@%STrm   N)lenr   keysr   r   
ValueErrorr   rs   r   r   r   )rZ   r   countr   r   r   s        @r]   set_attn_processorzUVit2DModel.set_attn_processor   s     D((--/0i&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1 	ALD&'fi@	Arm   c           	      j   t        d | j                  j                         D              rt               }nmt        d | j                  j                         D              rt	               }n8t        dt        t        | j                  j                                            | j                  |       y)ze
        Disables custom attention processors and sets the default attention implementation.
        c              3   @   K   | ]  }|j                   t        v   y wri   )r\   r   .0procs     r]   	<genexpr>z9UVit2DModel.set_default_attn_processor.<locals>.<genexpr>  s     i4t~~!>>i   c              3   @   K   | ]  }|j                   t        v   y wri   )r\   r   r   s     r]   r   z9UVit2DModel.set_default_attn_processor.<locals>.<genexpr>  s     h$#==hr   zOCannot call `set_default_attn_processor` when attention processors are of type N)	allr   valuesr   r   r   nextiterr   )rZ   r   s     r]   set_default_attn_processorz&UVit2DModel.set_default_attn_processor  s     i4K_K_KfKfKhii,.Ih$J^J^JeJeJghh%Iabfgklp  mA  mA  mH  mH  mJ  hK  cL  bM  N  		*rm   )i   F              i   r   i@   i    r   r   r   FF         r   i   gư>T@   ri   )__name__
__module____qualname__ _supports_gradient_checkpointingr
   intboolfloatrF   r   propertyr   r   r   r   r   r   r   __classcell__r\   s   @r]   r   r   '   s   '+$   #!%($(#&!"% !!##%#&!% $&*?j, j, 	j,
 j, j,  #j, "j, !j, j, j, j,   !j," #j,$ %j,& 'j,( )j,, -j,. !/j,2 !3j,6 7j,: ;j,<  $=j,> ?j, j,X;z c+=&=!>  0 AE2Dd3PbKbFc2c,d  AF+rm   r   c                   $     e Zd Z fdZd Z xZS )rJ   c                     t         |           t        j                  ||      | _        t        |||      | _        t        j                  ||d|      | _        y Nrb   )kernel_sizer5   )	rE   rF   r   	Embedding
embeddingsr   
layer_normConv2dconv)rZ   r&   r'   r$   elementwise_affineepsr5   r\   s          r]   rF   zUVit2DConvEmbed.__init__"  sI    ,,z;?!+s4FGIIk+=1SWX	rm   c                     | j                  |      }| j                  |      }|j                  dddd      }| j                  |      }|S )Nr   r   rb   r   )r   r   rv   r   )rZ   rx   r   s      r]   r   zUVit2DConvEmbed.forward(  sH    __Y/
__Z0
''1a3
YYz*
rm   r   r   r   rF   r   r   r   s   @r]   rJ   rJ   !  s    Yrm   rJ   c                   2     e Zd Zdededef fdZd Z xZS )rM   r(   r)   r*   c                    t         |           |
rt        |ddddd|||	      | _        nd | _        t	        j
                  t        |      D cg c]  }t        ||||||       c}      | _        t	        j
                  t        |      D cg c]  }t        ||||z  |||	|||	       c}      | _
        |rt        |ddddd|||d	

      | _        y d | _        y c c}w c c}w )NTr   Conv2d_0r   rms_norm)use_convpaddingr   r   r=   r   r   r5   )r<   rD   r   F)	use_conv_transposer   r   r   r=   r   r   r5   interpolate)rE   rF   r   r)   r   rQ   rR   ConvNextBlock
res_blocksr   attention_blocksr   r*   )rZ   r}   r(   r   r   r1   r0   r   r+   r.   r)   r*   ir[   r\   s                 r]   rF   zUVitBlock.__init__1  s#    	*$"#8
DO #DO-- ~.
  ")"

 !# ~.  '#/%#+'/
!
" &#'$"#8!DM !DMY
s   CC"c                    | j                   | j                  |      }t        | j                  | j                        D ]v  \  }} |||      }|j                  \  }}}	}
|j                  |||	|
z        j                  ddd      } ||||      }|j                  ddd      j                  |||	|
      }x | j                  | j                  |      }|S )Nr   r   rb   )rf   rg   )r)   zipr   r   rr   viewrv   r*   )rZ   xre   rf   rg   	res_blockattention_blockr|   r}   r~   r   s              r]   r   zUVitBlock.forward  s    ??&"A*-doot?T?T*U 	M&I!_-A23''/J&%z8Ve^<DDQ1MA)>WmA 		!Q"''
HfeLA	M ==$a Arm   )r   r   r   r   r   rF   r   r   r   s   @r]   rM   rM   0  s0    L! L! L! L!\rm   rM   c                   (     e Zd Z	 d fd	Zd Z xZS )r   c                    t         |           t        j                  ||dd||      | _        t        |||      | _        t        j                  |t        ||z        |      | _	        t        j                         | _        t        t        ||z              | _        t        j                  t        ||z        ||      | _        t        j                  |      | _        t        j                  ||dz  |      | _        y )Nr   rb   )r   r   groupsr5   r4   r   )rE   rF   r   r   	depthwiser   normrG   r   channelwise_linear_1GELUchannelwise_actr   channelwise_normchannelwise_linear_2Dropoutchannelwise_dropoutcond_embeds_mapper)	rZ   r}   r0   r1   r   r   r   res_ffn_factorr\   s	           r]   rF   zConvNextBlock.__init__  s     	
 Hn6KL	$&IIhH~<U8V]e$f!!wwy 23x.7P3Q R$&IIc(^2K.Lh]e$f!#%::n#= "$))KAx"Prm   c                    |}| j                  |      }|j                  dddd      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }| j                  |      }| j                  |      }|j                  dddd      }||z   }| j                  t        j                  |            j                  dd      \  }}|d|d d d d d d f   z   z  |d d d d d d f   z   }|S )Nr   r   r   rb   rc   )r   rv   r   r   r   r   r   r   r   Fsiluchunk)rZ   r   cond_embedsx_resscaleshifts         r]   r   zConvNextBlock.forward  s   NN1IIaAq!IIaL%%a(  #!!!$%%a($$Q'IIaAq!I..qvvk/BCII!QRISuU1at+,,-aD$6F0GGrm   )   r   r   s   @r]   r   r     s    uvQ(rm   r   c                   >     e Zd Zdedededededef fdZd Z xZS )	rW   r'   r&   r   r1   r0   r%   c                     t         |           t        j                  ||d|      | _        t        |||      | _        t        j                  ||d|      | _        y r   )rE   rF   r   r   conv1r   r   conv2)rZ   r'   r&   r   r1   r0   r%   r\   s          r]   rF   zConvMlmLayer.__init__  sO     	YY1;AT\]
!+~?TUYY{MqxX
rm   c                     | j                  |      }| j                  |j                  dddd            j                  dddd      }| j                  |      }|S )Nr   r   r   rb   )r   r   rv   r   )rZ   r{   r   s      r]   r   zConvMlmLayer.forward  sW    

=1(=(=aAq(IJRRSTVWYZ\]^M*rm   )	r   r   r   r   r   r   rF   r   r   r   s   @r]   rW   rW     sN    YY Y 	Y
  $Y Y Yrm   rW   ))typingr   r   rs   torch.nn.functionalr   
functionalr   torch.utils.checkpointr   configuration_utilsr	   r
   loadersr   	attentionr   r   attention_processorr   r   r   r   r   r   r   r   modeling_utilsr   normalizationr   r   resnetr   r   r   r   rJ   rM   r   rW    rm   r]   <module>r     s          - B ' E  C ' 7 -w+*k+; w+tbii `		 `F*BII *Z299 rm   