
    bi                     L    d dl Z d dl mZ ddlmZmZ ddlmZ  G d dee      Zy)    N)nn   )ConfigMixinregister_to_config)
ModelMixinc                   H     e Zd ZdZeddddededef fd       Zd	 Z xZS )
UnCLIPTextProjModelz
    Utility class for CLIP embeddings. Used to combine the image and text embeddings into a format usable by the
    decoder.

    For more details, see the original paper: https://huggingface.co/papers/2204.06125 section 2.1
       i   )clip_extra_context_tokensclip_embeddings_dimr   r   time_embed_dimc                   t         |           t        j                  t	        j
                  |            | _        t        j                  ||      | _        t        j                  ||      | _	        || _
        t        j                  || j                  |z        | _        t        j                  ||      | _        t        j                  |      | _        y )N)super__init__r   	Parametertorchzeros+learned_classifier_free_guidance_embeddingsLinearembedding_proj0clip_image_embeddings_project_to_time_embeddingsr   clip_extra_context_tokens_projencoder_hidden_states_proj	LayerNormtext_encoder_hidden_states_norm)selfr   r   r   cross_attention_dim	__class__s        _/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/unclip/text_proj.pyr   zUnCLIPTextProjModel.__init__   s     	;=<<TgHh;i8 !ii(;^L@B		J]_m@n= *C&.0ii!?!?BU!U/
+ +-))4GI\*]'/1||<O/P,    c                ^   |rU|j                   d   }| j                  j                  d      }|j                  |d      }t	        j
                  ||gd      }|j                   d   |j                   d   k(  sJ |j                   d   }| j                  |      }| j                  |      }	|	|z   }
| j                  |      }|j                  |d| j                        }|j                  ddd      }| j                  |      }| j                  |      }t	        j
                  ||gd      }||
fS )Nr   )dim      )shaper   	unsqueezeexpandr   catr   r   r   reshaper   permuter   r   )r   image_embeddingsprompt_embedstext_encoder_hidden_statesdo_classifier_free_guidanceimage_embeddings_batch_size#classifier_free_guidance_embeddings
batch_sizetime_projected_prompt_embedstime_projected_image_embeddingsadditive_clip_time_embeddingsr   s               r   forwardzUnCLIPTextProjModel.forward7   sU   &*:*@*@*C'262b2b2l2lmn2o/2U2\2\+R3/  %yy*MO_)`fgh  %%a(M,?,?,BBBB"((+
 (,':':='I$*.*_*_`p*q'(GJf(f% %)$G$GHX$Y!$=$E$EjRTVZVtVt$u!$=$E$EaA$N!%)%D%DE_%`"%)%I%IJd%e"%*YY0IKe/flm%n")+HHHr    )	__name__
__module____qualname____doc__r   intr   r6   __classcell__)r   s   @r   r	   r	      sM      *+#&	Q $'Q !	Q
 Q Q0Ir    r	   )r   r   configuration_utilsr   r   modelsr   r	    r    r   <module>r@      s%      B  @I*k @Ir    