
    biY                     H   d dl mZ d dlmZmZmZmZmZmZ d dl	Z	d dl
mZ ddlmZmZ ddlmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z&  ejN                  e(      Z)e G d de             Z* G d de#ee      Z+ G d de#      Z,y)    )	dataclass)AnyDictListOptionalTupleUnionN   )ConfigMixinregister_to_config)PeftAdapterMixin)USE_PEFT_BACKEND
BaseOutputloggingscale_lora_layersunscale_lora_layers   )AttentionProcessor)ControlNetConditioningEmbeddingzero_module)*CombinedTimestepGuidanceTextProjEmbeddings"CombinedTimestepTextProjEmbeddingsFluxPosEmbed)Transformer2DModelOutput)
ModelMixin)FluxSingleTransformerBlockFluxTransformerBlockc                   V    e Zd ZU eej
                     ed<   eej
                     ed<   y)FluxControlNetOutputcontrolnet_block_samplescontrolnet_single_block_samplesN)__name__
__module____qualname__r   torchTensor__annotations__     g/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_flux.pyr   r   #   s     #ELL11%*5<<%88r)   r   c                       e Zd ZdZedddddddd	d
g dddfdedededededededededee   dedef fd       Ze	d        Z
d Ze	 	 	 	 	 d+dedededefd       Z	 	 	 	 	 	 	 	 	 	 d,dej                  dej                  dej                  d ed!ej                  d"ej                  d#ej"                  d$ej                  d%ej                  d&ej                  d'eeeef      d(ed)eej.                  ef   fd*Z xZS )-FluxControlNetModelT   @      &         i   i   F)   8   r4   N
patch_sizein_channels
num_layersnum_single_layersattention_head_dimnum_attention_headsjoint_attention_dimpooled_projection_dimguidance_embedsaxes_dims_ropenum_modeconditioning_embedding_channelsc           
      H   t         |           || _        ||z  | _        t	        d|
      | _        |	rt        nt        } || j                  |      | _        t        j                  || j                        | _        t        j                  j                  || j                        | _        t        j                  t        |      D cg c]  }t!        | j                  ||       c}      | _        t        j                  t        |      D cg c]  }t%        | j                  ||       c}      | _        t        j                  g       | _        t        t+        | j"                              D ]N  }| j(                  j-                  t/        t        j                  | j                  | j                                     P t        j                  g       | _        t        t+        | j&                              D ]N  }| j0                  j-                  t/        t        j                  | j                  | j                                     P |d u| _        | j2                  r%t        j4                  || j                        | _        |It9        |d      | _        t        j                  j                  || j                        | _        d| _        y d | _        t/        t        j                  j                  || j                              | _        d| _        y c c}w c c}w )Ni'  )thetaaxes_dim)embedding_dimr<   )dimr:   r9   )r3   r3   r3   r3   )r@   block_out_channelsF) super__init__out_channels	inner_dimr   	pos_embedr   r   time_text_embednnLinearcontext_embedderr%   
x_embedder
ModuleListranger   transformer_blocksr   single_transformer_blockscontrolnet_blockslenappendr   controlnet_single_blocksunion	Embeddingcontrolnet_mode_embedderr   input_hint_blockcontrolnet_x_embeddergradient_checkpointing)selfr5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   text_time_guidance_clsi_	__class__s                   r*   rH   zFluxControlNetModel.__init__,   s     	',/AA%ENK:I6Oq 	  6..@U 
 !#		*=t~~ N((//+t~~F"$-- z*  %(;'9	#
 *, 01  +(;'9	*
& "$r!2s42234 	bA""))+biiPTP^P^6_*`a	b )+b(9%s499:; 	iA))00RYYt~~W[WeWe=f1gh	i T)
::,.LL4>>,RD)*6$C0Odt%D! */dnn)UD&
 ',# %)D!)4UXX__[RVR`R`5a)bD&&+#Ws   
LLc                     i }dt         dt        j                  j                  dt        t         t
        f   ffd| j                         D ]  \  }} |||        |S )z
        Returns:
            `dict` of attention processors: A dictionary containing all attention processors used in the model with
            indexed by its weight name.
        namemodule
processorsc                     t        |d      r|j                         ||  d<   |j                         D ]  \  }} |  d| ||        |S )Nget_processor
.processor.)hasattrri   named_children)re   rf   rg   sub_namechildfn_recursive_add_processorss        r*   rp   zHFluxControlNetModel.attn_processors.<locals>.fn_recursive_add_processors   sd    v/282F2F2H
dV:./#)#8#8#: U%+tfAhZ,@%TU r)   )strr%   rM   Moduler   r   rm   )r_   rg   re   rf   rp   s       @r*   attn_processorsz#FluxControlNetModel.attn_processorsy   sm     
	c 	588?? 	X\]`bt]tXu 	 !//1 	BLD&'fjA	B r)   c           	      T   t        | j                  j                               }t        |t              r,t        |      |k7  rt        dt        |       d| d| d      dt        dt        j                  j                  ffd| j                         D ]  \  }} |||        y)	a4  
        Sets the attention processor to use to compute attention.

        Parameters:
            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
                The instantiated processor class or a dictionary of processor classes that will be set as the processor
                for **all** `Attention` layers.

                If `processor` is a dict, the key needs to define the path to the corresponding cross attention
                processor. This is strongly recommended when setting trainable attention processors.

        z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.re   rf   c                     t        |d      rEt        |t              s|j                  |       n#|j                  |j	                  |  d             |j                         D ]  \  }} |  d| ||        y )Nset_processorrj   rk   )rl   
isinstancedictrv   poprm   )re   rf   	processorrn   ro   fn_recursive_attn_processors        r*   r{   zKFluxControlNetModel.set_attn_processor.<locals>.fn_recursive_attn_processor   sx    v/!)T2((3(($z7J)KL#)#8#8#: T%+tfAhZ,@%STr)   N)rV   rs   keysrw   rx   
ValueErrorrq   r%   rM   rr   rm   )r_   rz   countre   rf   r{   s        @r*   set_attn_processorz&FluxControlNetModel.set_attn_processor   s     D((--/0i&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1 	ALD&'fi@	Ar)   c                     t        |j                        }||d<   ||d<   ||d<   ||d<   | j                  |      }|rP|j                  j	                  |j                  j                                |j                  j	                  |j                  j                                |j                  j	                  |j                  j                                |j                  j	                  |j                  j                                |j                  j	                  |j                  j                         d       |j                  j	                  |j                  j                         d       t        |j                        |_        |S )Nr7   r8   r9   r:   F)strict)rx   configfrom_configrK   load_state_dict
state_dictrL   rO   rP   rS   rT   r   r]   )	clstransformerr7   r8   r9   r:   load_weights_from_transformerr   
controlnets	            r*   from_transformerz$FluxControlNetModel.from_transformer   sN    k(())|&7"#'9#$(;$%__V,
(  001F1F1Q1Q1ST&&66{7R7R7]7]7_`''778T8T8_8_8ab!!11+2H2H2S2S2UV))99+:X:X:c:c:ens9t00@@55@@B5 A  0;:;[;[/\J,r)   hidden_statescontrolnet_condcontrolnet_modeconditioning_scaleencoder_hidden_statespooled_projectionstimestepimg_idstxt_idsguidancejoint_attention_kwargsreturn_dictreturnc                    |#|j                         }|j                  dd      }nd}t        rt        | |       n)|'|j	                  dd      t
        j                  d       | j                  |      }| j                  | j                  |      }|j                  \  }}}}|| j                  j                  z  }|| j                  j                  z  }|j                  |||| j                  j                  || j                  j                        }|j                  ddddd	d
      }|j                  |||z  d      }|| j                  |      z   }|j                  |j                         dz  }|
|
j                  |j                         dz  }
nd}
|
| j#                  ||      n| j#                  ||
|      }| j%                  |      }|	j&                  d	k(  rt
        j                  d       |	d   }	|j&                  d	k(  rt
        j                  d       |d   }| j(                  rS|t+        d      | j-                  |      }t/        j0                  ||gd      }t/        j0                  |	dd |	gd      }	t/        j0                  |	|fd      }| j3                  |      }d}t5        | j6                        D ]S  \  }}t/        j8                         r%| j:                  r| j=                  |||||      \  }}n |||||      \  }}||fz   }U d}t5        | j>                        D ]S  \  }}t/        j8                         r%| j:                  r| j=                  |||||      \  }}n |||||      \  }}||fz   }U d}tA        || jB                        D ]  \  }} ||      }||fz   } d}tA        || jD                        D ]  \  } } ||       } || fz   } |D !cg c]  }!|!|z  	 }}!|D !cg c]  }!|!|z  	 }}!tG        |      dk(  rdn|}tG        |      dk(  rdn|}t        rtI        | |       |s||fS tK        ||      S c c}!w c c}!w )a  
        The [`FluxTransformer2DModel`] forward method.

        Args:
            hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`):
                Input `hidden_states`.
            controlnet_cond (`torch.Tensor`):
                The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`.
            controlnet_mode (`torch.Tensor`):
                The mode tensor of shape `(batch_size, 1)`.
            conditioning_scale (`float`, defaults to `1.0`):
                The scale factor for ControlNet outputs.
            encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`):
                Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
            pooled_projections (`torch.FloatTensor` of shape `(batch_size, projection_dim)`): Embeddings projected
                from the embeddings of input conditions.
            timestep ( `torch.LongTensor`):
                Used to indicate denoising step.
            block_controlnet_hidden_states: (`list` of `torch.Tensor`):
                A list of tensors that if specified are added to the residuals of transformer blocks.
            joint_attention_kwargs (`dict`, *optional*):
                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
                `self.processor` in
                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
                tuple.

        Returns:
            If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
            `tuple` where the first element is the sample tensor.
        Nscale      ?z\Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective.r   r      r-   r
      i  zrPassing `txt_ids` 3d torch.Tensor is deprecated.Please remove the batch dimension and pass it as a 2d torch TensorzrPassing `img_ids` 3d torch.Tensor is deprecated.Please remove the batch dimension and pass it as a 2d torch TensorzA`controlnet_mode` cannot be `None` when applying ControlNet-Union)rE   r(   )r   r   tembimage_rotary_emb)r    r!   )&copyry   r   r   getloggerwarningrP   r\   shaper   r5   reshapepermuter]   todtyperL   rO   ndimrY   r}   r[   r%   catrK   	enumeraterS   is_grad_enabledr^   _gradient_checkpointing_funcrT   ziprU   rX   rV   r   r   )"r_   r   r   r   r   r   r   r   r   r   r   r   r   
lora_scale
batch_sizechannels	height_pwwidth_pwheightwidthr   controlnet_mode_embidsr   block_samplesindex_blockblocksingle_block_samplesr    block_samplecontrolnet_blockr!   single_block_samplesamples"                                     r*   forwardzFluxControlNetModel.forward   s   ^ "-%;%@%@%B"/33GSAJJdJ/%16L6P6PQXZ^6_6kr 6  ,"33ODO8G8M8M5J)X$++"8"88F 6 66E-55Hfdkk.D.DeT[[McMcO .55aAq!QGO-55j&5.RTUO%(B(B?(SS;;}223d:{{=#6#67$>HH    +=>%%h:LM 	
 !% 5 56K L<<1NNU ajG<<1NNU ajG::& !dee"&"?"?"P$)II/BDY.Z`a$b!ii!g 6A>Gii'*2>>#."+D,C,C"D 	=K$$&4+F+F7;7X7X!)$84%} 8="/*?%5	84%} *],<<M#	=&  ""+D,J,J"K 	KK$$&4+F+F7;7X7X!)$84%} 8="/*?%5	84%} $8=:J#J #	K( $& .1-AWAW.X 	R*L*+L9L'?</'Q$	R +-'589MtOlOl5m 	g1!1"23F"G.MQdPf.f+	g
 Og#gFF-?$?#g #gUt*u664F+F*u'*u+./G+HA+M4Sk 78A=DCb 	( j1,.MNN#%=,K
 	
 $h*us   !Q
3Q)r   
   r1   r2   T)
Nr   NNNNNNNT)r"   r#   r$    _supports_gradient_checkpointingr   intboolr   rH   propertyrs   r   classmethodr   r%   r&   float
LongTensorr   r   rq   r   r	   FloatTensorr   r   __classcell__rc   s   @r*   r,   r,   )   s*   '+$ !#"%#%#'%( %$0/3J,J, J, 	J,
 J,  J, !J, !J,  #J, J, S	J, J, *-J, J,X  0 AD  !#"%#%&*  	
   ! F )-$'.2+/%) $ $!%;? w
||w
 w
 	w

 "w
  %||w
 "LLw
 ""w
 w
 w
 ,,w
 !)c3h 8w
 w
 
u  "::	;w
r)   r,   c                   P    e Zd ZdZ fdZ	 	 	 	 	 	 	 	 ddej                  deej                     deej                     dee	   dej                  dej                  d	ej                  d
ej                  dej                  dej                  deeeef      dedeeef   fdZ xZS )FluxMultiControlNetModela  
    `FluxMultiControlNetModel` wrapper class for Multi-FluxControlNetModel

    This module is a wrapper for multiple instances of the `FluxControlNetModel`. The `forward()` API is designed to be
    compatible with `FluxControlNetModel`.

    Args:
        controlnets (`List[FluxControlNetModel]`):
            Provides additional conditioning to the unet during the denoising process. You must set multiple
            `FluxControlNetModel` as a list.
    c                 V    t         |           t        j                  |      | _        y )N)rG   rH   rM   rQ   nets)r_   controlnetsrc   s     r*   rH   z!FluxMultiControlNetModel.__init__  s    MM+.	r)   r   r   r   r   r   r   r   r   r   r   r   r   r   c                     t        | j                        dk(  r| j                  d   }t        t        |||            D ]}  \  }\  }}} ||||d d d f   |||
|||	|||      \  }}|dk(  r|}|}2|#!t        ||      D cg c]
  \  }}||z    }}}|Z]t        ||      D cg c]
  \  }}||z    }}} fS t        t        |||| j                              D ]~  \  }\  }}}} ||||d d d f   |||
|||	|||      \  }}|dk(  r|}|}3|#!t        ||      D cg c]
  \  }}||z    }}}|[^t        ||      D cg c]
  \  }}||z    }}} fS c c}}w c c}}w c c}}w c c}}w )Nr-   r   )r   r   r   r   r   r   r   r   r   r   r   r   )rV   r   r   r   )r_   r   r   r   r   r   r   r   r   r   r   r   r   r   ra   imagemoder   r   r   control_block_samplescontrol_single_block_samplescontrol_block_sampler   control_single_block_samples                            r*   r   z FluxMultiControlNetModel.forward  sk   " tyy>Q1J+4S/[m5n+o  ''E46@"/$)$(DM',%%'9*?##+A +733  6,9)3G0$05J5V GJJ_anFo1 B 4l 1<?1- 1 ,7<X<d NQ <>RN8 I ;\ 8,F84 87 R %&BBBI 8AO_6H$))T8 "33E4
 7A"/$)$(DM',%%'9*?##+A +733  6,9)3G0$05J5V GJJ_anFo1 B 4l 1<?1- 1 ,7<X<d NQ <>RN8 I ;\ 8,F84 8;"H %&BBBg1
8D1
8s   E(*E.)E4E:)NNNNNNNT)r"   r#   r$   __doc__rH   r%   r   r   tensorr   r&   r   r   r   rq   r   r   r	   r   r   r   r   r   s   @r*   r   r     s   
/ /3+/%) $ $!%;? ]C((]C ell+]C ell+	]C
 !K]C  %||]C "LL]C ""]C ]C ]C ,,]C !)c3h 8]C ]C 
#U*	+]Cr)   r   )-dataclassesr   typingr   r   r   r   r   r	   r%   torch.nnrM   configuration_utilsr   r   loadersr   utilsr   r   r   r   r   attention_processorr   controlnets.controlnetr   r   
embeddingsr   r   r   modeling_outputsr   modeling_utilsr   transformers.transformer_fluxr   r   
get_loggerr"   r   r   r,   r   r(   r)   r*   <module>r      s    " : :   B ' b b 4 Q u u 7 ' \ 
		H	% 9: 9 9
c
*k3C c
LnCz nCr)   