
    bi B                        d dl mZ d dlmZmZmZ d dlZd dlmZ ddlm	Z	m
Z
 ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZmZ  ej6                  e      Ze G d de             Z G d dee	      Z G d de      Z y)    )	dataclass)DictOptionalUnionN)nn   )ConfigMixinregister_to_config)
BaseOutputlogging   )AttentionProcessor)-HunyuanCombinedTimestepTextSizeStyleEmbedding
PatchEmbedPixArtAlphaTextProjection)
ModelMixin)HunyuanDiTBlock   )Tuplezero_modulec                   2    e Zd ZU eej
                     ed<   y)HunyuanControlNetOutputcontrolnet_block_samplesN)__name__
__module____qualname__r   torchTensor__annotations__     j/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_hunyuan.pyr   r   $   s    #ELL11r!   r   c                       e Zd Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddedededee   dee   dededed	ed
ededededef fd       Z	e
deeef   fd       Zdeeeeef   f   fdZe	 dd       Z	 	 	 	 	 	 	 	 	 ddej&                  defdZ xZS )HunyuanDiT2DControlNetModelconditioning_channelsnum_attention_headsattention_head_dimin_channels
patch_sizeactivation_fntransformer_num_layers	mlp_ratiocross_attention_dimcross_attention_dim_t5pooled_projection_dimtext_lentext_len_t5"use_style_cond_and_image_meta_sizec                    t         |           || _        ||z  | _        t	        ||dz  |d      | _        t        j                  t        j                  ||z   |t        j                              | _        t        |||||d       | _        t        |||||      | _        t        j                   g       | _        t        j                   t%        |	dz  dz
        D cg c]H  }t'        | j                  | j(                  j*                  |t-        | j                  |
z        |d	d
      J c}      | _        t1        t        j2                  ||            | _        t%        t7        | j.                              D ]>  }t        j2                  ||      }t1        |      }| j"                  j9                  |       @ y c c}w )N   	silu_fp32)in_featureshidden_sizeout_featuresact_fn)dtype)heightwidthr(   	embed_dimr)   pos_embed_type)r/   seq_lenr-   r2   r   r   TF)dimr&   r*   ff_inner_dimr-   qk_normskip)super__init__	num_heads	inner_dimr   text_embedderr   	Parameterr   randnfloat32text_embedding_paddingr   	pos_embedr   time_extra_emb
ModuleListcontrolnet_blocksranger   configr&   intblocksr   Linearinput_blocklenappend)selfr%   r&   r'   r(   r)   r*   sample_sizer7   r+   r,   r-   r.   r/   r0   r1   r2   layer_controlnet_block	__class__s                       r"   rE   z$HunyuanDiT2DControlNetModel.__init__*   s   ( 	,,/AA6..2,	
 ')llKK;.0C5==Y'
# $#!!
 L"7 6/Q
 "$r!2 mm ##9Q#>#BC   (,(G(G"/!$T^^i%?!@(; 
 'ryyk'JKs4;;'( 	<A!yykB*+;<""))*:;	<s    AF=returnc                     i }dt         dt        j                  j                  dt        t         t
        f   ffd| j                         D ]  \  }} |||        |S )z
        Returns:
            `dict` of attention processors: A dictionary containing all attention processors used in the model with
            indexed by its weight name.
        namemodule
processorsc                     t        |d      r|j                  d      ||  d<   |j                         D ]  \  }} |  d| ||        |S )Nget_processorT)return_deprecated_lora
.processor.)hasattrre   named_children)ra   rb   rc   sub_namechildfn_recursive_add_processorss        r"   rm   zPHunyuanDiT2DControlNetModel.attn_processors.<locals>.fn_recursive_add_processors   sj    v/282F2F^b2F2c
dV:./#)#8#8#: U%+tfAhZ,@%TU r!   )strr   r   Moduler   r   rj   )rY   rc   ra   rb   rm   s       @r"   attn_processorsz+HunyuanDiT2DControlNetModel.attn_processorsv   sm     
	c 	588?? 	X\]`bt]tXu 	 !//1 	BLD&'fjA	B r!   	processorc           	      T   t        | j                  j                               }t        |t              r,t        |      |k7  rt        dt        |       d| d| d      dt        dt        j                  j                  ffd| j                         D ]  \  }} |||        y)	a2  
        Sets the attention processor to use to compute attention.

        Parameters:
            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
                The instantiated processor class or a dictionary of processor classes that will be set as the processor
                for **all** `Attention` layers. If `processor` is a dict, the key needs to define the path to the
                corresponding cross attention processor. This is strongly recommended when setting trainable attention
                processors.
        z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.ra   rb   c                     t        |d      rEt        |t              s|j                  |       n#|j                  |j	                  |  d             |j                         D ]  \  }} |  d| ||        y )Nset_processorrg   rh   )ri   
isinstancedictrt   poprj   )ra   rb   rq   rk   rl   fn_recursive_attn_processors        r"   rx   zSHunyuanDiT2DControlNetModel.set_attn_processor.<locals>.fn_recursive_attn_processor   sx    v/!)T2((3(($z7J)KL#)#8#8#: T%+tfAhZ,@%STr!   N)rW   rp   keysru   rv   
ValueErrorrn   r   r   ro   rj   )rY   rq   countra   rb   rx   s        @r"   set_attn_processorz.HunyuanDiT2DControlNetModel.set_attn_processor   s     D((--/0i&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1 	ALD&'fi@	Ar!   c                 
   |j                   }|j                  }|j                  }|j                  }|j                  }	|j
                  }
|j                  }|j                  }|j                  }|j                  }|j                  }|j                  }|j                  }|}|xs |j                  } | ||||||	|
|||||||      }|r<|j                  |j                         d      }t         j#                  d|d           |S )N)r%   r+   r*   r'   r-   r.   r7   r(   r,   r&   r)   rZ   r0   r1   F)strictz0controlnet load from Hunyuan-DiT. missing_keys: r   )rR   r*   r'   r-   r.   r7   r(   r,   r&   r)   rZ   r0   r1   r+   load_state_dict
state_dictloggerwarning)clstransformerr%   r+   load_weights_from_transformerrR   r*   r'   r-   r.   r7   r(   r,   r&   r)   rZ   r0   r1   
controlnetkeys                       r"   from_transformerz,HunyuanDiT2DControlNetModel.from_transformer   s"    ##,,#66$88!'!>!>(((($$	$88&&
((??(( 5!7!X6;X;X"7#9'1 3#9## 3!##

  ),,[-C-C-Ee,TCNNMcRSfXVWr!   controlnet_condconditioning_scalec                 b   |j                   dd \  }}| j                  |      }|| j                  | j                  |            z   }| j                  |||	|
|j                        }|j                   \  }}}| j                  |j                  d|j                   d               }|j                  ||d      }t        j                  ||gd      }t        j                  ||gd      }|j                  d      j                         }t        j                  ||| j                        }d}t        | j                        D ]  \  }} |||||	      }||fz   } d}t        || j                         D ]  \  }} ||      }||fz   } |D cg c]  }||z  	 }}|s|fS t#        |
      S c c}w )  
        The [`HunyuanDiT2DControlNetModel`] forward method.

        Args:
        hidden_states (`torch.Tensor` of shape `(batch size, dim, height, width)`):
            The input tensor.
        timestep ( `torch.LongTensor`, *optional*):
            Used to indicate denoising step.
        controlnet_cond ( `torch.Tensor` ):
            The conditioning input to ControlNet.
        conditioning_scale ( `float` ):
            Indicate the conditioning scale.
        encoder_hidden_states ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*):
            Conditional embeddings for cross attention layer. This is the output of `BertModel`.
        text_embedding_mask: torch.Tensor
            An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output
            of `BertModel`.
        encoder_hidden_states_t5 ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*):
            Conditional embeddings for cross attention layer. This is the output of T5 Text Encoder.
        text_embedding_mask_t5: torch.Tensor
            An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output
            of T5 Text Encoder.
        image_meta_size (torch.Tensor):
            Conditional embedding indicate the image sizes
        style: torch.Tensor:
            Conditional embedding indicate the style
        image_rotary_emb (`torch.Tensor`):
            The image rotary embeddings to apply on query and key tensors during attention calculation.
        return_dict: bool
            Whether to return a dictionary.
        N)hidden_dtyper   )r@   r   r    )tembencoder_hidden_statesimage_rotary_emb)r   )shaperM   rV   rN   r:   rH   viewr   cat	unsqueezeboolwhererL   	enumeraterT   ziprP   r   )rY   hidden_statestimestepr   r   r   text_embedding_maskencoder_hidden_states_t5text_embedding_mask_t5image_meta_sizestyler   return_dictr;   r<   r   
batch_sizesequence_lengthr\   block_res_samplesr[   blockcontrolnet_block_res_samplesblock_res_sampler]   samples                             r"   forwardz#HunyuanDiT2DControlNetModel.forward   s   ^ &++BC0}5 &(8(89X(YY"".U]UcUc # 

 *B)G)G&
OQ#'#5#5$))".F.L.LR.PQ$
  $<#@#@_^`#a  %		+@BZ*[ab c#ii)<>T(U[]^1;;A>CCE %,?AVX\XsXs t%dkk2 	ELE5!&;!1	M !2]4D D	E (*$256GI_I_2` 	^../0@A+GK[J]+](	^
 So'o1C(C'o$'o022&@\]] (ps   F,)r      X   NNzgelu-approximate    i  (   g      @   i   r   M      T)r   NT	g      ?NNNNNNNT)r   r   r   r
   rS   r   rn   floatr   rE   propertyr   r   rp   r   r|   classmethodr   r   r   r   __classcell__r^   s   @r"   r$   r$   )   s    &'#%"$%)$(/&(#'&*%)37#I<"I< !I<  	I<
 c]I< SMI< I< !$I< I< !I< !$I<  #I< I<  !I<" -1#I< I<V c+=&=!>  .AE2Dd3PbKbFc2c,d A@ nr' '\ %(" !%#]^ 	]^
 "]^r!   r$   c                   Z     e Zd ZdZ fdZ	 	 	 	 	 	 	 	 	 ddej                  defdZ xZ	S ) HunyuanDiT2DMultiControlNetModela  
    `HunyuanDiT2DMultiControlNetModel` wrapper class for Multi-HunyuanDiT2DControlNetModel

    This module is a wrapper for multiple instances of the `HunyuanDiT2DControlNetModel`. The `forward()` API is
    designed to be compatible with `HunyuanDiT2DControlNetModel`.

    Args:
        controlnets (`List[HunyuanDiT2DControlNetModel]`):
            Provides additional conditioning to the unet during the denoising process. You must set multiple
            `HunyuanDiT2DControlNetModel` as a list.
    c                 V    t         |           t        j                  |      | _        y )N)rD   rE   r   rO   nets)rY   controlnetsr^   s     r"   rE   z)HunyuanDiT2DMultiControlNetModel.__init__E  s    MM+.	r!   r   r   c                     t        t        ||| j                              D ]O  \  }\  }}} ||||||||||	|
||      }|dk(  r|}&t        d   |d         D cg c]
  \  }}||z    }}}|f}Q S c c}}w )r   )r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   )rY   r   r   r   r   r   r   r   r   r   r   r   r   iimagescaler   block_samplescontrol_block_samplescontrol_block_sampleblock_samples                        r"   r   z(HunyuanDiT2DMultiControlNetModel.forwardI  s    \ .7s?L^`d`i`i7j-k 	A)A)uj&+! %#(&;$7)A'= /!1'M  Av(5% ?BBWXYBZ\ijk\l>m):,l )<7)% ) *?(@%1	A4 %$)s   A5r   )
r   r   r   __doc__rE   r   r   r   r   r   r   s   @r"   r   r   8  sJ    
/ %(" !%#H% 	H%
 "H%r!   r   )!dataclassesr   typingr   r   r   r   r   configuration_utilsr	   r
   utilsr   r   attention_processorr   
embeddingsr   r   r   modeling_utilsr   #transformers.hunyuan_transformer_2dr   r   r   r   
get_loggerr   r   r   r$   r   r    r!   r"   <module>r      s    " ( (   B ( 4 
 ( A * 
		H	% 2j 2 2L^*k L^^Y%z Y%r!   