
    bihP                     <   d dl mZ d dlmZmZmZ d dlZd dlZd dlmZ ddl	m
Z
mZ ddlmZmZ dd	lmZmZmZ dd
lmZmZ ddlmZ  ej0                  e      Ze G d de             Z G d dej8                        Z G d dee
      Z G d dej8                        Z G d dej8                        Z  G d dej8                        Z! G d dej8                        Z" G d dej8                        Z# G d dej8                        Z$ G d dej8                        Z%y)     )	dataclass)DictTupleUnionN)nn   )ConfigMixinregister_to_config)
BaseOutputlogging   )	AttentionAttentionProcessorAttnProcessor)TimestepEmbedding	Timesteps)
ModelMixinc                   0    e Zd ZU dZej
                  ed<   y)Kandinsky3UNetOutputNsample)__name__
__module____qualname__r   torchTensor__annotations__     a/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/unets/unet_kandinsky3.pyr   r       s    FELLr   r   c                   $     e Zd Z fdZd Z xZS )Kandinsky3EncoderProjc                     t         |           t        j                  ||d      | _        t        j
                  |      | _        y )NF)bias)super__init__r   Linearprojection_linear	LayerNormprojection_norm)selfencoder_hid_dimcross_attention_dim	__class__s      r   r%   zKandinsky3EncoderProj.__init__&   s7    !#?<OV[!\!||,?@r   c                 J    | j                  |      }| j                  |      }|S N)r'   r)   )r*   xs     r   forwardzKandinsky3EncoderProj.forward+   s'    ""1%  #r   r   r   r   r%   r1   __classcell__r-   s   @r   r!   r!   %   s    A
r   r!   c                        e Zd Ze	 	 	 	 	 	 	 	 ddededededeeee   f   dee   deeee   f   def fd	       Zed
e	e
ef   fd       Zdeee	e
ef   f   fdZd ZddZ xZS )Kandinsky3UNetin_channelstime_embedding_dimgroupsattention_head_dimlayers_per_blockblock_out_channelsr,   r+   c	                     t         |           d}	d}
d}d}|}|d   dz  }t        |dd      | _        t	        ||      | _        t        |||      | _        t        j                  ||dd	      | _
        t        ||      | _        |gt        |      z   }t        t        |d d
 |dd              }|D cg c]  }|r|nd 
 }}t        |      |gz  }|||g}t!        t"        |      }g }t        |      | _        t        j&                  g       | _        t+        t        |g|       D ]r  \  }\  \  }}}}}|| j$                  dz
  k7  }|j-                  || j$                  dz
  k7  r|nd       | j(                  j-                  t/        ||||||||	|
||             t t        j&                  g       | _        t+        t        t#        |      g|       D ]O  \  }\  \  }}}}}|dk7  }| j0                  j-                  t3        ||j5                         |||||||	|
||             Q t        j6                  ||      | _        t        j:                         | _        t        j                  ||dd	      | _        y c c}w )N   r   )FTTTr   F   )flip_sin_to_cosdownscale_freq_shiftr   kernel_sizepadding) r$   r%   r   	time_projr   time_embeddingKandinsky3AttentionPoolingadd_time_conditionr   Conv2dconv_inr!   encoder_hid_projlistziplenmapreversed
num_levels
ModuleListdown_blocks	enumerateappendKandinsky3DownSampleBlock	up_blocksKandinsky3UpSampleBlockpop	GroupNormconv_norm_outSiLUconv_act_outconv_out) r*   r7   r8   r9   r:   r;   r<   r,   r+   expansion_ratiocompression_ratioadd_cross_attentionadd_self_attentionout_channelsinit_channelshidden_dimsin_out_dimsis_exist	text_dims
num_blockslayer_paramsrev_layer_paramscat_dimslevelin_dimout_dimres_block_numtext_dimself_attentiondown_sample	up_sampler-   s                                   r   r%   zKandinsky3UNet.__init__2   s    	 76"*1-2"=%^_`/

 #= 35G#
 yymTUV 5oGZ [$o-?(@@3{3B/QRABObc8H($>c	c+,0@/AA
"I/ABx6k*==,S\+l+T
 	OEO%VW}h  DOOa$78KOOu11D'EG1M##)&!&#%"	* r*S\%9(89T
 	OEO%Wf}h 
INN!!'LLN&!&#%"		*  \\&-@GGI		-1VWXk ds   5I;returnc                     i }dt         dt        j                  j                  dt        t         t
        f   ffd| j                         D ]  \  }} |||        |S )z
        Returns:
            `dict` of attention processors: A dictionary containing all attention processors used in the model with
            indexed by its weight name.
        namemodule
processorsc                     t        |d      r|j                  ||  d<   |j                         D ]  \  }} |  d| ||        |S Nset_processorz
.processor.)hasattr	processornamed_children)rx   ry   rz   sub_namechildfn_recursive_add_processorss        r   r   zCKandinsky3UNet.attn_processors.<locals>.fn_recursive_add_processors   sa    v/282B2B
dV:./#)#8#8#: U%+tfAhZ,@%TU r   )strr   r   Moduler   r   r   )r*   rz   rx   ry   r   s       @r   attn_processorszKandinsky3UNet.attn_processors   sm     
	c 	588?? 	X\]`bt]tXu 	 !//1 	BLD&'fjA	B r   r   c           	      T   t        | j                  j                               }t        |t              r,t        |      |k7  rt        dt        |       d| d| d      dt        dt        j                  j                  ffd| j                         D ]  \  }} |||        y)	a4  
        Sets the attention processor to use to compute attention.

        Parameters:
            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
                The instantiated processor class or a dictionary of processor classes that will be set as the processor
                for **all** `Attention` layers.

                If `processor` is a dict, the key needs to define the path to the corresponding cross attention
                processor. This is strongly recommended when setting trainable attention processors.

        z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.rx   ry   c                     t        |d      rEt        |t              s|j                  |       n#|j                  |j	                  |  d             |j                         D ]  \  }} |  d| ||        y r|   )r   
isinstancedictr}   rZ   r   )rx   ry   r   r   r   fn_recursive_attn_processors        r   r   zFKandinsky3UNet.set_attn_processor.<locals>.fn_recursive_attn_processor   sx    v/!)T2((3(($z7J)KL#)#8#8#: T%+tfAhZ,@%STr   N)rO   r   keysr   r   
ValueErrorr   r   r   r   r   )r*   r   countrx   ry   r   s        @r   set_attn_processorz!Kandinsky3UNet.set_attn_processor   s     D((--/0i&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1 	ALD&'fi@	Ar   c                 6    | j                  t                      y)ze
        Disables custom attention processors and sets the default attention implementation.
        N)r   r   )r*   s    r   set_default_attn_processorz)Kandinsky3UNet.set_default_attn_processor   s     	0r   c                    |2d|j                  |j                        z
  dz  }|j                  d      }t        j                  |      sTt        |t              rt        j                  nt        j                  }t        j                  |g||j                        }n6t        |j                        dk(  r|d    j                  |j                        }|j                  |j                  d         }| j                  |      j                  |j                        }| j                  |      }| j!                  |      }|| j#                  |||      }g }	| j%                  |      }t'        | j(                        D ]4  \  }
} |||||      }|
| j*                  dz
  k7  s$|	j-                  |       6 t'        | j.                        D ]<  \  }
}|
dk7  r't        j0                  ||	j3                         gd      } |||||      }> | j5                  |      }| j7                  |      }| j9                  |      }|s|fS t;        |      S )Nr?   g     )dtypedevicer   dim)r   )tor   	unsqueezer   	is_tensorr   floatfloat32int32tensorr   rO   shapeexpandrF   rG   rL   rI   rK   rU   rT   rR   rV   rX   catrZ   r\   r^   r_   r   )r*   r   timestepencoder_hidden_statesencoder_attention_maskreturn_dictr   time_embed_input
time_embedhidden_statesrn   rt   ru   s                r   r1   zKandinsky3UNet.forward   s   !-&'*@*C*CFLL*Q&QU]%]"%;%E%Ea%H"x(%/%%@EMMekkE||XJeFMMRH A%~((7H ??6<<?3>>(366v||D(()9:
 $ 5 56K L ,00=RTjkJf%"+D,<,<"= 	-E; 5JLbcF!++$$V,	-
 !*$.. 9 	bE9zFM,=,=,?#@aHvz3HJ`aF	b
 ##F+""6*v&9#622r   )r>          @   r   )i  i   r   i      r   )NNT)r   r   r   r
   intr   r   r%   propertyr   r   r   r   r   r   r1   r3   r4   s   @r   r6   r6   1   s    "&"$34)?6:#[Y[Y  [Y 	[Y
  [Y  U3Z0[Y "#J[Y #3c
?3[Y [Y [Yz c+=&=!>  . AE2Dd3PbKbFc2c,d  AD1'3r   r6   c                   8     e Zd Z	 	 	 	 	 	 	 	 d fd	ZddZ xZS )rY   c                    t         |           d |rdnd d d ggd gdz  g|dz
  z  z   }||z   |fg||fg|dz
  z  z   ||fgz   }g }g }g }|| _        || _        |r |j	                  t        ||d |||	             n#|j	                  t        j                                t        ||      D ]  \  \  }}}|j	                  t        |||||
|             | |j	                  t        ||||||	             n#|j	                  t        j                                |j	                  t        |||||
              t        j                  |      | _        t        j                  |      | _        t        j                  |      | _        y )NTr>   r?   r   r$   r%   rs   context_dimrV   Kandinsky3AttentionBlockr   IdentityrN   Kandinsky3ResNetBlockrS   
attentions
resnets_inresnets_out)r*   r7   cat_dimrd   time_embed_dimr   rj   r9   head_dimr`   ra   ru   rs   up_resolutionshidden_channelsr   r   r   
in_channelout_channelup_resolutionr-   s                        r   r%   z Kandinsky3UpSampleBlock.__init__   s    	dDIJtfWXj\]gjk]kMllG#[12[)*j1n=>\*+, 	
 

,&(~tVU]_no bkkm,8;O^8\ 	4%Z}%j*nfVgivw &!!,"NKSb !!"++-0%j+~vWhi	$ --
3--
3==5r   c                    t        | j                  dd  | j                  | j                        D ]0  \  }}} |||      }| j                   ||||||      } |||      }2 | j
                  r | j                  d   |||      }|S )Nr?   r   
image_mask)rN   r   r   r   r   rs   	r*   r0   r   contextcontext_maskr   	attention	resnet_in
resnet_outs	            r   r1   zKandinsky3UpSampleBlock.forward5  s    03DOOAB4GZ^ZjZj0k 	*,Iy*!Z(A+aWlJO1j)A		* ""1jZHAr   Nr   r   r   r>   r   TTNNNr2   r4   s   @r   rY   rY      s(     86t	r   rY   c                   8     e Zd Z	 	 	 	 	 	 	 	 d fd	ZddZ xZS )rW   c                    t         |           g }g }g }|| _        || _        |r |j	                  t        ||d |||             n#|j	                  t        j                                d gdz  g|dz
  z  d d |
rdnd d ggz   }||fg||fg|dz
  z  z   }t        ||      D ]  \  \  }}}|j	                  t        |||||	             | |j	                  t        ||||||             n#|j	                  t        j                                |j	                  t        |||||	|              t        j                  |      | _        t        j                  |      | _        t        j                  |      | _        y )Nr>   r?   Fr   )r*   r7   rd   r   r   rj   r9   r   r`   ra   rt   rs   r   r   r   r   r   r   r   r   r-   s                       r   r%   z"Kandinsky3DownSampleBlock.__init__B  s    	

,&(ndFT\^mn bkkm,&1*a8T4R]cgim<n;oo'67L,;W:X\fij\j:kk8;O^8\ 	4%Z}%j+~vWhi &!!,#^[&(Tc !!"++-0%nfFWYf	( --
3--
3==5r   c                    | j                   r | j                  d   |||      }t        | j                  dd  | j                  | j                        D ]0  \  }}} |||      }| j
                   ||||||      } |||      }2 |S )Nr   r   r?   )rs   r   rN   r   r   r   r   s	            r   r1   z!Kandinsky3DownSampleBlock.forwardy  s    ""1jZHA03DOOAB4GZ^ZjZj0k 	*,Iy*!Z(A+aWlJO1j)A		*
 r   r   r   r2   r4   s   @r   rW   rW   A  s(     56n	r   rW   c                   $     e Zd Z fdZd Z xZS )Kandinsky3ConditionalGroupNormc                    t         |           t        j                  ||d      | _        t        j
                  t        j                         t        j                  |d|z              | _        | j                  d   j                  j                  j                          | j                  d   j                  j                  j                          y )NF)affiner   r?   )r$   r%   r   r[   norm
Sequentialr]   r&   context_mlpweightdatazero_r#   )r*   r9   normalized_shaper   r-   s       r   r%   z'Kandinsky3ConditionalGroupNorm.__init__  s    LL)9%H	==BIIk1O_K_4`a""''--/  %%++-r   c                     | j                  |      }t        t        |j                  dd              D ]  }|j	                  d      } |j                  dd      \  }}| j                  |      |dz   z  |z   }|S )Nr   rE   r?   r   g      ?)r   rangerO   r   r   chunkr   )r*   r0   r   _scaleshifts         r   r1   z&Kandinsky3ConditionalGroupNorm.forward  s}    ""7+s17712;'( 	,A''+G	, }}QA}.uIIaLECK(50r   r2   r4   s   @r   r   r     s    .r   r   c                   &     e Zd Zd fd	Zd Z xZS )Kandinsky3Blockc                    t         |           t        |||      | _        t	        j
                         | _        |!|rt	        j                  ||dd      | _        nt	        j                         | _        t        |dkD        }t	        j                  ||||      | _        |!|st	        j                  ||dd      | _        y t	        j                         | _        y )Nr   rC   strider?   rB   )r$   r%   r   
group_normr   r]   
activationConvTranspose2dru   r   r   rJ   
projectionrt   )	r*   r7   rd   r   rC   norm_groupsr   rD   r-   s	           r   r%   zKandinsky3Block.__init__  s    8kSab'')$//[VW`abDN[[]DNkAo&))K;`gh$]!yy|QR[\]D!{{}Dr   c                     | j                  ||      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }|S r/   )r   r   ru   r   rt   )r*   r0   r   s      r   r1   zKandinsky3Block.forward  sQ    OOAz*OOANN1OOAQr   )r   r   Nr2   r4   s   @r   r   r     s    -"r   r   c                   4     e Zd Zddddgz  f fd	Zd Z xZS )r   r   r   r>   Nc                 l   t         |           g d}t        ||      |z  }||fg||fgdz  z   ||fgz   }	t        j                  t        |	||      D 
cg c]  \  \  }
}}}t        |
|||||       c}}}}
      | _        d|v rt        j                  ||dd      nt        j                         | _
        ||k7  rt        j                  ||d      nt        j                         | _        d|v rt        j                  ||dd      | _        y t        j                         | _        y c c}}}}
w )N)r?   r   r   r?   r   Tr   r?   )rC   F)r$   r%   maxr   rS   rN   r   resnet_blocksr   r   shortcut_up_samplerJ   shortcut_projectionshortcut_down_sample)r*   r7   rd   r   r   ra   r   kernel_sizeshidden_channelr   r   r   rC   r   r-   s                 r   r%   zKandinsky3ResNetBlock.__init__  sS    	#[,7;LL>*+/O.PST.TTYgiuXvWww 	  ]] NQ#\>N I-Z{M  
KVacpq
 ~% {KQqQ 	 DOR^C^BIIk<Q?dfdododq 	 
 & IIlLaJ 	!  	!s   D.c                     |}| j                   D ]  } |||      } | j                  |      }| j                  |      }| j                  |      }||z   }|S r/   )r   r   r   r   )r*   r0   r   outresnet_blocks        r   r1   zKandinsky3ResNetBlock.forward  sh     .. 	0LsJ/C	0 ##A&$$Q'%%a(Gr   r2   r4   s   @r   r   r     s    EG[\mnrvqwmw
>	r   r   c                   (     e Zd Zd fd	ZddZ xZS )rH   c                 L    t         |           t        ||||d      | _        y )NFdim_headrp   out_bias)r$   r%   r   r   )r*   num_channelsr   r   r-   s       r   r%   z#Kandinsky3AttentionPooling.__init__  s)    " 
r   c                     |j                  |j                        }| j                  |j                  dd      ||      }||j	                  d      z   S )Nr   r?   T)r   keepdim)r   r   r   meansqueeze)r*   r0   r   r   s       r   r1   z"Kandinsky3AttentionPooling.forward  sI    #W]];..!T!BG\Z7??1%%%r   )r   r/   r2   r4   s   @r   rH   rH     s    
&r   rH   c                   (     e Zd Zd fd	ZddZ xZS )r   c           
      ^   t         |           t        |||      | _        t	        ||xs |||d      | _        ||z  }t        |||      | _        t        j                  t        j                  ||dd      t        j                         t        j                  ||dd            | _        y )NFr   r?   )rC   r#   )r$   r%   r   in_normr   r   out_normr   r   rJ   r]   feed_forward)	r*   r   r   r   r   r   r`   r   r-   s	           r   r%   z!Kandinsky3AttentionBlock.__init__  s    5k<Q_`"'< 
 *L86{LR`aMMIIlOOGGIIIo|O
r   c                     |j                   dd  \  }}| j                  ||      }|j                  |j                   d   d||z        j                  ddd      }||n|}||j	                  |j
                        }| j                  |||      }|j                  ddd      j                  d      j                  |j                   d   d||      }||z   }| j                  ||      }| j                  |      }||z   }|S )Nr   rE   r   r?   r   )
r   r  reshapepermuter   r   r   r   r  r  )	r*   r0   r   r   r   r   heightwidthr   s	            r   r1   z Kandinsky3AttentionBlock.forward  s    ll1j)kk!''!*b&5.9AA!QJ$0'c#'????LnnS'<8kk!Q",,R0881r6SXYGmmAz*$Gr   )Nr   r   r>   r   r2   r4   s   @r   r   r     s    
&r   r   )&dataclassesr   typingr   r   r   r   torch.utils.checkpointr   configuration_utilsr	   r
   utilsr   r   attention_processorr   r   r   
embeddingsr   r   modeling_utilsr   
get_loggerr   loggerr   r   r!   r6   rY   rW   r   r   r   rH   r   r   r   r   <module>r     s    " % %    B ( N N 5 ' 
		H	%  :    	BII 	F3Z F3RDbii DNA		 AHRYY &bii 6)BII )X& &"#ryy #r   