
    biK                       d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	m
c mZ d dlm
Z
 ddlmZ ddlmZmZmZ ddlmZmZmZ ddlmZmZ  ej4                  e      Z e       rd dlZ e       r	d dlZd dlZndZ e       r ed	d
      rd dl m!Z! d dl"m#Z# dZ$ndZ$e G d de
jJ                               Z& G d de
jJ                        Z' G d de
jJ                        Z( G d de
jJ                        Z) G d d      Z* G d d      Z+ G d de
jJ                        Z, G d d      Z- G d d       Z. G d! d"      Z/ G d# d$      Z0 G d% d&      Z1 G d' d(      Z2 G d) d*      Z3 G d+ d,      Z4 G d- d.      Z5 G d/ d0      Z6 G d1 d2      Z7 G d3 d4      Z8 G d5 d6      Z9 G d7 d8      Z: G d9 d:      Z; G d; d<      Z< G d= d>      Z= G d? d@      Z> G dA dB      Z? G dC dD      Z@ G dE dF      ZA G dG dH      ZB G dI dJ      ZC G dK dL      ZD G dM dN      ZE G dO dPe
jJ                        ZF G dQ dRe
jJ                        ZG G dS dT      ZH G dU dV      ZI G dW dXe
jJ                        ZJ G dY dZe
jJ                        ZK G d[ d\ej                  jJ                        ZL G d] d^ej                  jJ                        ZM G d_ d`ej                  jJ                        ZN G da db      ZO G dc dd      ZP G de df      ZQ G dg dh      ZR G di dj      ZS G dk dl      ZT G dm dn      ZU G do dp      ZV G dq dr      ZW G ds dt      ZX G du dv      ZY G dw dx      ZZ G dy dz      Z[ G d{ d|      Z\ G d} d~      Z] G d d      Z^ G d d      Z_e-eIe.e9fZ`e+e<e:eHeKeLe\fZaeg e+e,e-e.e/e0e1e2e4e5e6eYe]e[e^e7e8e9e:e=e;e<e>e*e?e@eAeBeCeDeEeFeGeHeIeVeWeXe(eQe'eKeLeMeNeOePeReSeTeU   Zby)    N)CallableListOptionalTupleUnion)nn   )IPAdapterMaskProcessor)	deprecateis_torch_xla_availablelogging)is_torch_npu_availableis_torch_xla_versionis_xformers_available)is_torch_versionmaybe_allow_in_graph>z2.2)flash_attention)is_spmdTFc            9           e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d@dedee   dedee   deded	ed
ededee   dedee   dee   dee   dee   dee   dedededededededed   dedededef8 fd Z		 	 dAd!ed"ee
ee   d#f      d$dfd%Zd&ed$dfd'Z	 dBd(ed)ee   d$dfd*Zd+ed$dfd,ZdCd-ZdDd.ed$d/fd0Z	 	 dEd1ej&                  d2eej&                     d3eej&                     d$ej&                  fd4Zd5ej&                  d$ej&                  fd6ZdFd5ej&                  ded$ej&                  fd7Z	 dBd8ej&                  d9ej&                  d3eej&                     d$ej&                  fd:Z	 dFd3ej&                  d;ed<eded$ej&                  f
d=Zd2ej&                  d$ej&                  fd>Z ej4                         dGd?       Z xZS )H	Attentiona  
    A cross attention layer.

    Parameters:
        query_dim (`int`):
            The number of channels in the query.
        cross_attention_dim (`int`, *optional*):
            The number of channels in the encoder_hidden_states. If not given, defaults to `query_dim`.
        heads (`int`,  *optional*, defaults to 8):
            The number of heads to use for multi-head attention.
        kv_heads (`int`,  *optional*, defaults to `None`):
            The number of key and value heads to use for multi-head attention. Defaults to `heads`. If
            `kv_heads=heads`, the model will use Multi Head Attention (MHA), if `kv_heads=1` the model will use Multi
            Query Attention (MQA) otherwise GQA is used.
        dim_head (`int`,  *optional*, defaults to 64):
            The number of channels in each head.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
        bias (`bool`, *optional*, defaults to False):
            Set to `True` for the query, key, and value linear layers to contain a bias parameter.
        upcast_attention (`bool`, *optional*, defaults to False):
            Set to `True` to upcast the attention computation to `float32`.
        upcast_softmax (`bool`, *optional*, defaults to False):
            Set to `True` to upcast the softmax computation to `float32`.
        cross_attention_norm (`str`, *optional*, defaults to `None`):
            The type of normalization to use for the cross attention. Can be `None`, `layer_norm`, or `group_norm`.
        cross_attention_norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups to use for the group norm in the cross attention.
        added_kv_proj_dim (`int`, *optional*, defaults to `None`):
            The number of channels to use for the added key and value projections. If `None`, no projection is used.
        norm_num_groups (`int`, *optional*, defaults to `None`):
            The number of groups to use for the group norm in the attention.
        spatial_norm_dim (`int`, *optional*, defaults to `None`):
            The number of channels to use for the spatial normalization.
        out_bias (`bool`, *optional*, defaults to `True`):
            Set to `True` to use a bias in the output linear layer.
        scale_qk (`bool`, *optional*, defaults to `True`):
            Set to `True` to scale the query and key by `1 / sqrt(dim_head)`.
        only_cross_attention (`bool`, *optional*, defaults to `False`):
            Set to `True` to only use cross attention and not added_kv_proj_dim. Can only be set to `True` if
            `added_kv_proj_dim` is not `None`.
        eps (`float`, *optional*, defaults to 1e-5):
            An additional value added to the denominator in group normalization that is used for numerical stability.
        rescale_output_factor (`float`, *optional*, defaults to 1.0):
            A factor to rescale the output by dividing it with this value.
        residual_connection (`bool`, *optional*, defaults to `False`):
            Set to `True` to add the residual connection to the output.
        _from_deprecated_attn_block (`bool`, *optional*, defaults to `False`):
            Set to `True` if the attention block is loaded from a deprecated state dict.
        processor (`AttnProcessor`, *optional*, defaults to `None`):
            The attention processor to use. If `None`, defaults to `AttnProcessor2_0` if `torch 2.x` is used and
            `AttnProcessor` otherwise.
    N	query_dimcross_attention_dimheadskv_headsdim_headdropoutbiasupcast_attentionupcast_softmaxcross_attention_normcross_attention_norm_num_groupsqk_normadded_kv_proj_dimadded_proj_biasnorm_num_groupsspatial_norm_dimout_biasscale_qkonly_cross_attentionepsrescale_output_factorresidual_connection_from_deprecated_attn_block	processorAttnProcessorout_dimout_context_dimelementwise_affine	is_causalc                    t         #|           ddlm}m} m}! ||n||z  | _        || j                  n||z  | _        || _        || _	        |d u| _
        ||n|| _        || _        |	| _        || _        || _        || _        d| _        ||n|| _        ||n|| _        || _        || _        || _        || _        || _        | j0                  r|dz  nd| _        |||z  n|| _        || _        || _        || _        | j8                  | j:                  rt=        d      |t?        j@                  |||d      | _!        nd | _!        |tE        ||	      | _#        nd | _#        |d | _$        d | _%        n1|d
k(  r;t?        jL                  |||      | _$        t?        jL                  |||      | _%        n|dk(  r# ||dd|      | _$         ||dd|      | _%        n|dk(  r?t?        jL                  ||z  |      | _$        t?        jL                  ||z  |      | _%        n|dk(  r! |!|||      | _$         |!|||      | _%        n_|dk(  r% |!||z  |      | _$         |!||z  |      | _%        n5|dk(  r! | dd|      | _$         | dd|      | _%        nt=        d| d      |
d | _'        nx|
d
k(  r%t?        jL                  | j                        | _'        nN|
dk(  r:| j8                  |}"n| j                  }"t?        j@                  |"|dd      | _'        nt=        d|
 d      t?        jP                  || j                  |      | _)        | j:                  sct?        jP                  | j                  | j                  |      | _*        t?        jP                  | j                  | j                  |      | _+        nd | _*        d | _+        || _,        | j8                  t?        jP                  || j                  |      | _-        t?        jP                  || j                  |      | _.        | j(                  =t?        jP                  || j                  |      | _/        nd | _/        d | _-        d | _.        | j*                  st?        j`                  g       | _1        | jb                  je                  t?        jP                  | j                  | j$                  |             | jb                  je                  t?        jf                  |             nd | _1        | j(                  >| j(                  s2t?        jP                  | j                  | j&                  |      | _4        nd | _4        |||d
k(  r;t?        jL                  |||      | _5        t?        jL                  |||      | _6        n|dk(  r# ||dd|      | _5         ||dd|      | _6        n`|dk(  r |!||      | _5         |!||      | _6        n<|dk(  rd | _5         |!||z  |      | _6        nt=        d| d      d | _5        d | _6        |0to        tp        d      r| j0                  r
ts               n	tu               }| jw                  |       y )N   )FP32LayerNormLpNormRMSNormFg            ?z`only_cross_attention` can only be set to True if `added_kv_proj_dim` is not None. Make sure to set either `only_cross_attention=False` or define `added_kv_proj_dim`.Tnum_channels
num_groupsr+   affine)
f_channelszq_channels
layer_norm)r+   r3   fp32_layer_norm)r3   r   r+   layer_norm_across_heads)r+   rms_normrms_norm_across_headsl2r	   )pdimr+   zunknown qk_norm: z. Should be one of None, 'layer_norm', 'fp32_layer_norm', 'layer_norm_across_heads', 'rms_norm', 'rms_norm_across_heads', 'l2'.
group_normh㈵>zunknown cross_attention_norm: z.. Should be None, 'layer_norm' or 'group_norm'r   zC. Should be one of `None,'layer_norm','fp32_layer_norm','rms_norm'`scaled_dot_product_attention)<super__init__normalizationr7   r8   r9   	inner_diminner_kv_dimr   use_biasis_cross_attentionr   r   r    r,   r-   r   fused_projectionsr1   r2   context_pre_onlypre_onlyr4   r.   r)   scaler   sliceable_head_dimr$   r*   
ValueErrorr   	GroupNormrJ   SpatialNormspatial_normnorm_qnorm_k	LayerNorm
norm_crossLinearto_qto_kto_vr%   
add_k_proj
add_v_proj
add_q_proj
ModuleListto_outappendDropout
to_add_outnorm_added_qnorm_added_khasattrFAttnProcessor2_0r0   set_processor)$selfr   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r1   r2   rV   rW   r3   r4   r7   r8   r9   norm_cross_num_channels	__class__s$                                      _/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/attention_processor.pyrO   zAttention.__init__i   s+   B 	 	BA$+$7X=M.6.>DNNHxDW""5T"A:M:Y#6_h  0,%:"#6 !&")"5w92A2MS\ 0 " ,G( '+}}Xt^#
,3,?W(U
 #(!2$8!!!)d.G.G y  & ll	ocfostDO"DO' +yN^ _D $D?DKDK$,,xSM_`DK,,xSM_`DK))'UQV\_`DK'UQV\_`DK11,,x%'7SADK,,x(':DDK
"!(HZ[DK!(HZ[DK//!(U"2<DK!(X"53?DK_ 1"#6DK 1"#6DK#G9  -l  m   '"DO!\1 ll4+C+CDDO!\1%%1 +<'*.*B*B' ll4A`fjswDO 01E0FFtu  IIidC	((		$":":D<M<MTXYDI		$":":D<M<MTXYDIDIDI.!!- ii(94;L;LSbcDO ii(94;L;LSbcDO$$0"$)),=t~~Tc"d"DO"DO"DO}}--+DKKKryyHUVKKrzz'23DK  ,T5J5J ii8L8LS[\DO"DO#4#@,&$&LLsWi$j!$&LLsWi$j!--$1(u[`fi$j!$1(u[`fi$j!J&$+H#$>!$+H#$>!33 %)!$+Hx,?S$I! 'y0st  !%D $D &-a1O&PUYUbUb "huhw  	9%    use_xla_flash_attentionpartition_spec.returnc                    |rHt         sdt        dd      rdt               rt        dd      rd|rt        |      }n<t	        |      }n0t        t        d      r| j                  r
t               n	t               }| j                  |       y)	ak  
        Set whether to use xla flash attention from `torch_xla` or not.

        Args:
            use_xla_flash_attention (`bool`):
                Whether to use pallas flash attention kernel from `torch_xla` or not.
            partition_spec (`Tuple[]`, *optional*):
                Specify the partition specification if using SPMD. Otherwise None.
        ztorch_xla is not available<2.3zEflash attention pallas kernel is supported from torch_xla version 2.32.4zPflash attention pallas kernel using SPMD is supported from torch_xla version 2.4rM   N)r   r   r   XLAFluxFlashAttnProcessor2_0XLAFlashAttnProcessor2_0rp   rq   r)   rr   r0   rs   )rt   ry   rz   is_fluxr/   s        rw   set_use_xla_flash_attentionz%Attention.set_use_xla_flash_attention5  s     #)22%c51]]3C?hh <^ LI 8 HI '.a1O&PUYUbUb "huhw  	9%rx   use_npu_flash_attentionc                     |rt               }n0t        t        d      r| j                  r
t	               n	t               }| j                  |       y)zR
        Set whether to use npu flash attention from `torch_npu` or not.

        rM   N)AttnProcessorNPUrp   rq   r)   rr   r0   rs   )rt   r   r/   s      rw   set_use_npu_flash_attentionz%Attention.set_use_npu_flash_attentionV  sF    
 #(*I '.a1O&PUYUbUb "huhw  	9%rx   'use_memory_efficient_attention_xformersattention_opc                     t        | d      xr% t        | j                  t        t        t
        f      }t        | d      xr* t        | j                  t        t        t        t        f      }t        | d      xr% t        | j                  t        t        t        f      }t        | d      xr  t        | j                  t        t        f      }|r|r|rt        d| j                         t!               st#        dd      t$        j&                  j)                         st+        d      	 d}||\  }}	|j,                  ^}}
t%        j.                  dd	|
      }t0        j2                  j5                  |||      }
|rt	        | j                  j8                  | j                  j:                  | j                  j<                  | j                  j>                  |      }|jA                  | j                  jC                                t        | j                  d      r|jE                  | j                  jF                  jH                  jJ                         n|r#tL        jO                  d       t        |      }n\|rt        | j                  j<                  | j                  j>                  | j                  jP                  | j                  jR                  |      }|jA                  | j                  jC                                t        | j                  d      r|jE                  | j                  jT                  d   jH                  jJ                  | j                  jT                  d   jH                  jV                  
       nN|rt        |      }n>tY        |      }n0|rt        tZ        d      rt
        nt        } || j                  j8                  | j                  j:                  | j                  j<                  | j                  j>                        }|jA                  | j                  jC                                t        | j                  d      rv|jE                  | j                  jF                  jH                  jJ                         n;|rt        | j                  j<                  | j                  j>                  | j                  jP                  | j                  jR                        }|jA                  | j                  jC                                t        | j                  d      r|jE                  | j                  jT                  d   jH                  jJ                  | j                  jT                  d   jH                  jV                  
       n0t        tZ        d      r| j\                  r
t_               n	ta               }| jc                  |       y# t6        $ r}|d}~ww xY w)a  
        Set whether to use memory efficient attention from `xformers` or not.

        Args:
            use_memory_efficient_attention_xformers (`bool`):
                Whether to use memory efficient attention from `xformers` or not.
            attention_op (`Callable`, *optional*):
                The attention operation to use. Defaults to `None` which uses the default attention operation from
                `xformers`.
        r/   zhMemory efficient attention is currently not supported for custom diffusion for attention processor type zeRefer to https://github.com/facebookresearch/xformers for more information on how to install xformersxformers)namezvtorch.cuda.is_available() should be True but is False. xformers' memory efficient attention is only available for GPU N)r6   r	   (   cudadevicedtype)train_kvtrain_q_outhidden_sizer   r   to_k_custom_diffusionzMemory efficient attention with `xformers` might currently not work correctly if an attention mask is required for the attention operation.r   )r   r   
num_tokensrX   r   to_k_ipr   rM   )r   r   r   r   )r   r   r   rX   )2rp   
isinstancer/   CustomDiffusionAttnProcessor$CustomDiffusionXFormersAttnProcessorCustomDiffusionAttnProcessor2_0AttnAddedKVProcessorAttnAddedKVProcessor2_0SlicedAttnAddedKVProcessorXFormersAttnAddedKVProcessorIPAdapterAttnProcessorIPAdapterAttnProcessor2_0IPAdapterXFormersAttnProcessorJointAttnProcessor2_0XFormersJointAttnProcessorNotImplementedErrorr   ModuleNotFoundErrortorchr   is_availablerZ   SUPPORTED_DTYPESrandnr   opsmemory_efficient_attention	Exceptionr   r   r   r   load_state_dict
state_dicttor   weightr   loggerinfor   rX   r   r   XFormersAttnProcessorrq   r)   rr   r0   rs   )rt   r   r   is_custom_diffusionis_added_kv_processoris_ip_adapteris_joint_processorr   op_fwop_bw_qer/   attn_processor_classs                  rw   +set_use_memory_efficient_attention_xformersz5Attention.set_use_memory_efficient_attention_xformersg  s    &dK8 
ZNN)+OQpq>
 !(k : !
zNN$'*,	@
  k2 
zNN#%>@^_8
 %T;7 
JNN%*=
 3$)<)~  @D  @N  @N  O  P  )*)$ $  ZZ,,. / 
	 E#/'3u$)$:$:	JvUKA ??1aHA #@!^^44 $ : : $ : :(,(J(J!-	 ))$..*C*C*EF4>>+BCLL!E!E!L!L!S!ST&
  b 9lS	: $ : :(,(J(J#~~88....!-	 ))$..*C*C*EF4>>95LL#~~55a8??FFdnnNdNdefNgNnNnNtNt !  $6LQ	1|L	" q"@A 45 %
 1!^^44 $ : : $ : :(,(J(J		 ))$..*C*C*EF4>>+BCLL!E!E!L!L!S!ST5 $ : :(,(J(J#~~88....		 ))$..*C*C*EF4>>95LL#~~55a8??FFdnnNdNdefNgNnNnNtNt !  q"@Admm %&&  	9%_ ! Gs   -AW- -	W=6W88W=
slice_sizec                 ^   |+|| j                   kD  rt        d| d| j                    d      || j                  t        |      }nU|t	        |      }nG| j                  t               }n0t        t        d      r| j                  r
t               n	t               }| j                  |       y)z
        Set the slice size for attention computation.

        Args:
            slice_size (`int`):
                The slice size for attention computation.
        Nzslice_size z has to be smaller or equal to .rM   )rY   rZ   r$   r   SlicedAttnProcessorr   rp   rq   r)   rr   r0   rs   )rt   r   r/   s      rw   set_attention_slicezAttention.set_attention_slice  s     !j43J3J&J{:,6UVZVmVmUnnopqq!d&<&<&H2:>I#+J7I##/,.I '.a1O&PUYUbUb "huhw  	9%rx   c                 N   t        | d      rt        | j                  t        j                  j
                        rdt        |t        j                  j
                        s@t        j                  d| j                   d|        | j                  j                  d       || _        y)z
        Set the attention processor to use.

        Args:
            processor (`AttnProcessor`):
                The attention processor to use.
        r/   z-You are removing possibly trained weights of z with N)
rp   r   r/   r   r   Moduler   r   _modulespop)rt   r/   s     rw   rs   zAttention.set_processor  sq     D+&4>>588??;y%((//:KKGGWW]^g]hijMMk*"rx   return_deprecated_loraAttentionProcessorc                      |s| j                   S y)a7  
        Get the attention processor in use.

        Args:
            return_deprecated_lora (`bool`, *optional*, defaults to `False`):
                Set to `True` to return the deprecated LoRA attention processor.

        Returns:
            "AttentionProcessor": The attention processor in use.
        Nr/   )rt   r   s     rw   get_processorzAttention.get_processor)  s     &>>! &rx   hidden_statesencoder_hidden_statesattention_maskc                    t        t        j                  | j                  j                        j
                  j                               }ddh}|j                         D cg c]  \  }}||vs||vs| }	}}t        |	      dkD  r:t        j                  d|	 d| j                  j                  j                   d       |j                         D 
ci c]  \  }}
||v s||
 }}}
 | j                  | |f||d|S c c}}w c c}
}w )ah  
        The forward method of the `Attention` class.

        Args:
            hidden_states (`torch.Tensor`):
                The hidden states of the query.
            encoder_hidden_states (`torch.Tensor`, *optional*):
                The hidden states of the encoder.
            attention_mask (`torch.Tensor`, *optional*):
                The attention mask to use. If `None`, no mask is applied.
            **cross_attention_kwargs:
                Additional keyword arguments to pass along to the cross attention.

        Returns:
            `torch.Tensor`: The output of the attention layer.
        ip_adapter_masksip_hidden_statesr   zcross_attention_kwargs z are not expected by z and will be ignored.r   r   )setinspect	signaturer/   __call__
parameterskeysitemslenr   warningrv   __name__)rt   r   r   r   cross_attention_kwargsattn_parametersquiet_attn_parameterskr   unused_kwargsws              rw   forwardzAttention.forward7  s(   6 g//0G0GHSSXXZ[!35G H0668
!QA_<TYZbwYwA
 
 }!NN)-8MdnnNfNfNoNoMp  qF  G 4J3O3O3Q!j41aUVZiUi!Q$!j!jt~~
 #8)	

 %
 	

 "ks   "D/D4DD#Dtensorc                     | j                   }|j                  \  }}}|j                  ||z  |||      }|j                  dddd      j                  ||z  |||z        }|S )ac  
        Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size // heads, seq_len, dim * heads]`. `heads`
        is the number of heads initialized while constructing the `Attention` class.

        Args:
            tensor (`torch.Tensor`): The tensor to reshape.

        Returns:
            `torch.Tensor`: The reshaped tensor.
        r   r	   r6      )r   shapereshapepermute)rt   r   	head_size
batch_sizeseq_lenrI   s         rw   batch_to_head_dimzAttention.batch_to_head_dime  sj     JJ	#)<< 
GS
i 7GSQ1a+33J)4KWVY\eVefrx   c                 "   | j                   }|j                  dk(  r|j                  \  }}}d}n|j                  \  }}}}|j                  |||z  |||z        }|j	                  dddd      }|dk(  r|j                  ||z  ||z  ||z        }|S )a   
        Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size, seq_len, heads, dim // heads]` `heads` is
        the number of heads initialized while constructing the `Attention` class.

        Args:
            tensor (`torch.Tensor`): The tensor to reshape.
            out_dim (`int`, *optional*, defaults to `3`): The output dimension of the tensor. If `3`, the tensor is
                reshaped to `[batch_size * heads, seq_len, dim // heads]`.

        Returns:
            `torch.Tensor`: The reshaped tensor.
        r   r6   r   r	   )r   ndimr   r   r   )rt   r   r1   r   r   r   rI   	extra_dims           rw   head_to_batch_dimzAttention.head_to_batch_dimv  s     JJ	;;!'-||$JI28,,/J	7C
Gi,?CS\L\]1a+a<^^J$:Gi<OQTXaQabFrx   querykeyc                    |j                   }| j                  r |j                         }|j                         }|Xt        j                  |j
                  d   |j
                  d   |j
                  d   |j                   |j                        }d}n|}d}t        j                  |||j                  dd      || j                        }~| j                  r|j                         }|j                  d      }~|j                  |      }|S )ak  
        Compute the attention scores.

        Args:
            query (`torch.Tensor`): The query tensor.
            key (`torch.Tensor`): The key tensor.
            attention_mask (`torch.Tensor`, *optional*): The attention mask to use. If `None`, no mask is applied.

        Returns:
            `torch.Tensor`: The attention probabilities/scores.
        r   r6   r   r   rG   )betaalpharI   )r   r   floatr   emptyr   r   baddbmm	transposerX   r    softmaxr   )	rt   r   r   r   r   baddbmm_inputr   attention_scoresattention_probss	            rw   get_attention_scoreszAttention.get_attention_scores  s       KKME))+C!!KKAA		!EKKX]XdXdM D*MD ==MM"b!**
 /557*22r2:),,U3rx   target_lengthr   c                 r   | j                   }||S |j                  d   }||k7  r|j                  j                  dk(  re|j                  d   |j                  d   |f}t	        j
                  ||j                  |j                        }t	        j                  ||gd      }nt        j                  |d|fd	      }|d
k(  r;|j                  d   ||z  k  r$|j                  |d|j                  d   |z        }|S |dk(  r5|j                  d      }|j                  |d|j                  d   |z        }|S )a  
        Prepare the attention mask for the attention computation.

        Args:
            attention_mask (`torch.Tensor`):
                The attention mask to prepare.
            target_length (`int`):
                The target length of the attention mask. This is the length of the attention mask after padding.
            batch_size (`int`):
                The batch size, which is used to repeat the attention mask.
            out_dim (`int`, *optional*, defaults to `3`):
                The output dimension of the attention mask. Can be either `3` or `4`.

        Returns:
            `torch.Tensor`: The prepared attention mask.
        rG   mpsr   r6   r   r	   r           )valuer   rI   output_size   )r   r   r   typer   zerosr   catrq   padrepeat_interleave	unsqueeze)	rt   r   r  r   r1   r   current_lengthpadding_shapepaddings	            rw   prepare_attention_maskz Attention.prepare_attention_mask  sY   & JJ	!!!,2226]*$$))U2 "0!5!5a!8.:N:Nq:QS` a++m>;O;OXfXmXmn!&NG+D!!L "#~=7IQT!Ua<##A&i)??!/!A!A1.2F2Fq2II2U "B "  \+55a8N+==qn.B.B1.E	.Q > N rx   c                 P   | j                   J d       t        | j                   t        j                        r| j                  |      }|S t        | j                   t        j                        r7|j                  dd      }| j                  |      }|j                  dd      }|S J )aG  
        Normalize the encoder hidden states. Requires `self.norm_cross` to be specified when constructing the
        `Attention` class.

        Args:
            encoder_hidden_states (`torch.Tensor`): Hidden states of the encoder.

        Returns:
            `torch.Tensor`: The normalized encoder hidden states.
        zGself.norm_cross must be defined to call self.norm_encoder_hidden_statesr6   r	   )ra   r   r   r`   r[   r   )rt   r   s     rw   norm_encoder_hidden_statesz$Attention.norm_encoder_hidden_states  s     *u,uu*door||4$(OO4I$J! %$ 6 %:$C$CAq$I!$(OO4I$J!$9$C$CAq$I! %$ 5rx   c                 	   | j                   j                  j                  j                  }| j                   j                  j                  j                  }| j
                  st        j                  | j                   j                  j                  | j                  j                  j                  | j                  j                  j                  g      }|j                  d   }|j                  d   }t        j                  ||| j                  ||      | _        | j                  j                  j                  |       | j                  rt        j                  | j                   j                   j                  | j                  j                   j                  | j                  j                   j                  g      }| j                  j                   j                  |       nCt        j                  | j                  j                  j                  | j                  j                  j                  g      }|j                  d   }|j                  d   }t        j                  ||| j                  ||      | _        | j"                  j                  j                  |       | j                  rxt        j                  | j                  j                   j                  | j                  j                   j                  g      }| j"                  j                   j                  |       t%        | dd       t%        | dd       t%        | dd       t        j                  | j&                  j                  j                  | j(                  j                  j                  | j*                  j                  j                  g      }|j                  d   }|j                  d   }t        j                  ||| j,                  ||      | _        | j.                  j                  j                  |       | j,                  rt        j                  | j&                  j                   j                  | j(                  j                   j                  | j*                  j                   j                  g      }| j.                  j                   j                  |       || _        y )Nr6   r   )r   r   r   rh   rf   rg   )rc   r   datar   r   rT   r   r  rd   re   r   r   rb   rS   to_qkvcopy_r   to_kvgetattrrh   rf   rg   r%   to_added_qkvrU   )rt   fuser   r   concatenated_weightsin_featuresout_featuresconcatenated_biass           rw   fuse_projectionszAttention.fuse_projections  st   !!&&--		  %%++&&#(99dii.>.>.C.CTYYEUEUEZEZ\`\e\e\l\l\q\q-r#s .44Q7K/55a8L ))KDMMZ`hmnDKKK$$%9:}}$)IItyy~~/B/BDIINNDWDWY]YbYbYgYgYlYl.m$n!  &&'89 $)99dii.>.>.C.CTYYEUEUEZEZ-[#\ .44Q7K/55a8L;4==Y_glmDJJJ##$89}}$)IItyy~~/B/BDIINNDWDW.X$Y!

%%&78 D,-9lD1=lD1=#(99'',,doo.D.D.I.I4??KaKaKfKfg$  /44Q7K/55a8L "		\0D0DV[`!D $$**+?@##$)II__))..0D0D0I0I4??K_K_KdKde%! !!&&,,->?!%rx   )N   N@   r	  FFFN    NNTNNTTFrK   r:   FFNNNNFTF)NFN)r/   r0   r{   N)FNN)r   )T)r   
__module____qualname____doc__intr   r   boolstrrO   r   r   r   r   r   r   rs   r   r   Tensorr   r   r   r  r  r  no_gradr&  __classcell__rv   s   @rw   r   r   1   s   4r .2"&!&$.2/1!%+/*.)-*.%*'*$),1/3##'?J&J& &c]J& 	J&
 3-J& J& J& J& J& J& 'smJ& *-J& #J& $C=J& "$J&  "#!J&" #3-#J&$ %J&& 'J&( #)J&* +J&,  %-J&. "/J&0 &*1J&2 O,3J&4 5J&6 7J&< !=J&> ?J&^ ?C	&!%& !x}c'9!:;&
 
&B&4 &D &$ aeP&7;P&KST\K]P&	P&d&c &d &8#("D "EY "" 9=15	,
||,
  (5,
 !.	,
 
,
\  " s 5<< 8 `d-\\-(--FNu||F\-	-` ab1#ll1;>1LO1Z]1	1f% %QVQ]Q] %: U]]_2& 2&rx   r   c                   h     e Zd Zdedededdf fdZdej                  dej                  fdZ xZS )	!SanaMultiscaleAttentionProjectionin_channelsnum_attention_headskernel_sizer{   Nc           	          t         |           d|z  }t        j                  ||||dz  |d      | _        t        j                  ||dddd|z  d      | _        y )Nr   r	   F)r  groupsr   r6   r   )r<  r   )rN   rO   r   Conv2dproj_inproj_out)rt   r8  r9  r:  channelsrv   s        rw   rO   z*SanaMultiscaleAttentionProjection.__init__G  se     	{?yy1$
 		(HaAaJ]F]dijrx   r   c                 J    | j                  |      }| j                  |      }|S r*  )r>  r?  rt   r   s     rw   r   z)SanaMultiscaleAttentionProjection.forwardZ  s$    ]3m4rx   )	r   r,  r-  r/  rO   r   r2  r   r4  r5  s   @rw   r7  r7  F  sN    kk !k 	k
 
k&U\\ ell rx   r7  c                   p    e Zd ZdZ	 	 	 	 	 	 	 ddededee   dedededeed	f   d
ede	f fdZ
dej                  dej                  dej                  dej                  fdZdej                  dej                  dej                  dej                  fdZdej                  dej                  fdZ xZS )SanaMultiscaleLinearAttentionz(Lightweight multi-scale linear attentionr8  out_channelsr9  attention_head_dimmult	norm_typekernel_sizes.r+   r-   c
                    t         |           ddlm}
 || _        || _        || _        |	| _        |t        ||z  |z        n|}||z  }t        j                  ||d      | _        t        j                  ||d      | _        t        j                  ||d      | _        t        j                         | _        |D ](  }| j                  j!                  t#        |||             * t        j$                         | _        t        j                  |dt)        |      z   z  |d      | _         |
||      | _        t/               | _        y )Nr6   )get_normalizationFrL   )num_features)rN   rO   rP   rK  r+   rF  rH  r-   r/  r   rb   rc   rd   re   ri   to_qkv_multiscalerk   r7  ReLUnonlinearityr   rj   norm_outSanaMultiscaleAttnProcessor2_0r/   )rt   r8  rE  r9  rF  rG  rH  rI  r+   r-   rK  rQ   r:  rv   s                rw   rO   z&SanaMultiscaleLinearAttention.__init__c  s*    	 	5"4"#6  >Q=XC11D89^q 	 (*<<	IIk95A	IIk95A	IIk95A	!#' 	K""))1)=PR]^	
 GGIii	Q\1B-B C\X]^)),O79rx   r   r   r
  r{   c                 D   t        j                  |ddd      }t        j                  ||j	                  dd            }t        j                  ||      }|j                  t        j                        }|d d d d d df   |d d d d dd f   | j                  z   z  }|S )Nr   r   r   r6   constantr6   moder
  rG   r   r   )rq   r  r   matmulr   r   float32r+   rt   r   r   r
  scoresr   s         rw   apply_linear_attentionz4SanaMultiscaleLinearAttention.apply_linear_attention  s    e\
!DeS]]2r%:;VU3%((u}}(=%aCRCi0M!Q)4Ltxx4WXrx   c                 @   t        j                  |j                  dd      |      }|j                  t         j                        }|t        j
                  |dd      | j                  z   z  }t        j                  ||j                  |j                              }|S )NrG   r   rW  r	   T)rI   keepdim)r   rX  r   r   rY  sumr+   r   rZ  s         rw   apply_quadratic_attentionz7SanaMultiscaleLinearAttention.apply_quadratic_attention  st    cmmB3U;/599VDADHHLMUFIIekk,BCrx   r   c                 &    | j                  | |      S r*  r   rB  s     rw   r   z%SanaMultiscaleLinearAttention.forward  s    ~~dM22rx   )Nr'  r:   
batch_norm)   V瞯<F)r   r,  r-  r.  r/  r   r   r1  r   r0  rO   r   r2  r\  r`  r   r4  r5  s   @rw   rD  rD  `  s   3 .2"#%(,$)):): ): &c]	):
  ): ): ): CHo): ): "):VELL u|| TYT`T` ejeqeq u|| %,, W\WcWc hmhtht 3U\\ 3ell 3rx   rD  c                        e Zd Z	 	 	 	 	 	 	 	 	 	 ddededddedededed	ed
ee   dee   dededef fdZ	 	 ddej                  deej                     deej                     fdZ
 xZS )MochiAttentionr   r$   r/   MochiAttnProcessor2_0r   r   r   r   r%   r1   r2   r(   rV   r+   c                    t         |           ddlm} |	|	n||z  | _        |	|	n|| _        |
r|
n|| _        || _        |	|	|z  n|| _         |||d      | _	         |||d      | _
         |||d      | _         |||d      | _        t        j                  || j                  |      | _        t        j                  || j                  |      | _        t        j                  || j                  |      | _        t        j                  || j                  |      | _        t        j                  || j                  |      | _        | j                  't        j                  || j                  |      | _        t        j*                  g       | _        | j,                  j/                  t        j                  | j                  | j
                  |             | j,                  j/                  t        j0                  |             | j                  s1t        j                  | j                  | j                  |      | _        || _        y )Nr6   )MochiRMSNormTrL   )rN   rO   rP   ri  rQ   r1   r2   rV   r   r^   r_   rn   ro   r   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   r/   )rt   r   r$   r/   r   r   r   r   r%   r1   r2   r(   rV   r+   ri  rv   s                  rw   rO   zMochiAttention.__init__  s     	/$+$7X=M")"5w92Ay 0,3,?W(U
"8S$7"8S$7(3=(3=IIidC	IIidC	IIidC	))$5t~~O\))$5t~~O\  , ii(94>>P_`DOmmB'299T^^T\\QR2::g./$$ ii8L8LS[\DO"rx   r   r   r   c                 0     | j                   | |f||d|S )Nr   r   )rt   r   r   r   kwargss        rw   r   zMochiAttention.forward  s4     t~~
 #8)	

 
 	
rx   )
r'  r(  r	  FTNNTFrK   r+  )r   r,  r-  r/  r   r0  r   rO   r   r2  r   r4  r5  s   @rw   rf  rf    s      $!%)-!&/#/# /# +	/#
 /# /# /# /# /# #/# "#/# /# /# /#h 9=15	
||
  (5
 !.	
rx   rf  c                       e Zd ZdZd Z	 ddddej                  dej                  dej                  d	eej                     d
ej                  fdZy)rg  z"Attention processor used in Mochi.c                 :    t        t        d      st        d      y )NrM   zUMochiAttnProcessor2_0 requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0.rp   rq   ImportErrorrt   s    rw   rO   zMochiAttnProcessor2_0.__init__      q89uvv :rx   Nattnrf  r   r   r   image_rotary_embr{   c           	      J   |j                  |      }|j                  |      }|j                  |      }|j                  d|j                  df      }|j                  d|j                  df      }|j                  d|j                  df      }|j
                  |j                  |      }|j                  |j                  |      }|j                  |      }	|j                  |      }
|j                  |      }|	j                  d|j                  df      }	|
j                  d|j                  df      }
|j                  d|j                  df      }|j                  |j                  |	      }	|j                  |j                  |
      }
|d } ||g| } ||g| }|j                  dd      |j                  dd      |j                  dd      }}}|	j                  dd      |
j                  dd      |j                  dd      }}
}	|j                  d      }|	j                  d      }||z   }|j                  \  }}}}g }t        |      D ].  }||   d d d f   }t!        j"                  |j%                         d      j%                         }|	||dz   d d |d d f   }|
||dz   d d |d d f   }|||dz   d d |d d f   }t!        j&                  |||dz    |gd      }t!        j&                  |||dz    |gd      }t!        j&                  |||dz    |gd      }t)        j*                  |||dd	      }|j                  d      }t)        j,                  |d
d
d
||z
  f      }|j/                  |       1 t!        j&                  |d
      }|j                  dd      j%                  dd      }|j1                  ||fd      \  }} |j2                  d
   |      } |j2                  d   |      }t5        |d      r|j7                  |      }||fS )Nr	   rG   c                 B   | ddd df   j                         }| ddd df   j                         }||z  ||z  z
  j                  | j                        }||z  ||z  z   j                  | j                        }t        j                  ||gd      j                  d      S )N.r   r	   r6   rG   r   r   )r   r   r   r   stackflatten)x	freqs_cos	freqs_sinx_evenx_oddcossins          rw   apply_rotary_embz8MochiAttnProcessor2_0.__call__.<locals>.apply_rotary_emb  s    319++-#qt!t)**,	)EI,==AA!''J	)EI,==AA!''J{{C:26>>rBBrx   r6   F)as_tupler   r	  	dropout_pr4   r   r   rm   )rc   rd   re   	unflattenr   r^   r_   rh   rf   rg   rn   ro   r   sizer   ranger   nonzerorw  r  rq   rM   r  rk   split_with_sizesrj   rp   rm   ) rt   rr  r   r   r   rs  r   r   r
  encoder_queryencoder_keyencoder_valuer  sequence_lengthencoder_sequence_lengthtotal_lengthr   r   r   rI   attn_outputsidxmaskvalid_prompt_token_indicesvalid_encoder_queryvalid_encoder_keyvalid_encoder_valuevalid_query	valid_keyvalid_valueattn_outputvalid_sequence_lengths                                    rw   r   zMochiAttnProcessor2_0.__call__  sB    		-(ii&		-(DJJ#34mmA

B/0DJJ#34;;"KK&E;;"++c"C(=>oo&;<(=>%//DJJ3CD!++A

B/?@%//DJJ3CD( --m<M(++K8K'C %U>-=>E"3:)9:C!OOAq13==A3FXY[\H]Es##Aq)!!!Q'##Aq) %2{  **Q-"/"4"4Q"7&)@@$)KK!
E1c$ 	-C!#&tQw/D).t||~PU)V)^)^)`&"/cAgqB\^_0_"` +C#'M1>XZ[,[ \"/cAgqB\^_0_"`))U3q%9;N$OUVWK		3sS1W#57H"IqQI))U3q%9;N$OUVWK88YseK %0$4$4Q$7!%%aA|F[7[-\]K,#	-& 		,A6%//15==aC/</M/M56A 0N 0
,,
 'A}5&A}54&$(OO4I$J!333rx   r*  	r   r,  r-  r.  rO   r   r2  r   r    rx   rw   rg  rg    so    ,w 48_4_4 ||_4  %||	_4
 _4 #5<<0_4 
_4rx   rg  c                       e Zd ZdZ	 	 	 d
dedej                  deej                     deej                     deej                     dej                  fd	Zy)r0   zJ
    Default processor for performing attention-related computations.
    Nrr  r   r   r   tembr{   c                 z   t        |      dkD  s|j                  dd       d}t        dd|       |}	|j                  |j                  ||      }|j                  }
|
dk(  r7|j
                  \  }}}}|j                  ||||z        j                  dd      }||j
                  n|j
                  \  }}}|j                  |||      }|j                  1|j                  |j                  dd            j                  dd      }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j!                  |||      }t#        j$                  ||      }|j'                  |      } |j(                  d   |      } |j(                  d   |      }|
dk(  r$|j                  dd	      j+                  |      }|j,                  r||	z   }||j.                  z  }|S )
Nr   rX   The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.1.0.0r  r6   r	   rG   r   )r   getr   r]   r   r   viewr   r  rJ   rc   ra   r  rd   re   r   r  r   bmmr   rj   r   r-   r,   )rt   rr  r   r   r   r  argsrk  deprecation_messageresidual
input_ndimr   channelheightwidthr  r   r   r   r
  r  s                        rw   r   zAttnProcessor.__call__R  sI    t9q=FJJw5A #Ugw(;< ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ 44^_V`a??& OOM,C,CAq,IJTTUVXYZM		-( ($1!__$($C$CDY$Z!ii-.		/0&&u-$$S)&&u-33E3O		/59..}= 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   NNN	r   r,  r-  r.  r   r   r2  r   r   r  rx   rw   r0   r0   M  s{     9=15'+@@ ||@  (5	@
 !.@ u||$@ 
@rx   r0   c                        e Zd ZdZ	 	 	 	 	 	 ddededee   dee   dedef fdZ	 	 dd	e	d
e
j                  dee
j                     dee
j                     de
j                  f
dZ xZS )r   aK  
    Processor for implementing attention for the Custom Diffusion method.

    Args:
        train_kv (`bool`, defaults to `True`):
            Whether to newly train the key and value matrices corresponding to the text features.
        train_q_out (`bool`, defaults to `True`):
            Whether to newly train query matrices corresponding to the latent image features.
        hidden_size (`int`, *optional*, defaults to `None`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`, *optional*, defaults to `None`):
            The number of channels in the `encoder_hidden_states`.
        out_bias (`bool`, defaults to `True`):
            Whether to include the bias parameter in `train_q_out`.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
    r   r   r   r   r(   r   c                 <   t         |           || _        || _        || _        || _        | j                  rBt        j                  |xs ||d      | _        t        j                  |xs ||d      | _	        | j                  rt        j                  ||d      | _
        t        j                  g       | _        | j                  j                  t        j                  |||             | j                  j                  t        j                  |             y y NFrL   rN   rO   r   r   r   r   r   rb   r   to_v_custom_diffusionto_q_custom_diffusionri   to_out_custom_diffusionrk   rl   rt   r   r   r   r   r(   r   rv   s          rw   rO   z%CustomDiffusionAttnProcessor.__init__       	 &&#6  ==)+3F3U+Wbin)oD&)+3F3U+Wbin)oD&)+;RW)XD&+-==+<D(((//		+{Ya0bc((//

70CD	 rx   rr  r   r   r   r{   c                 2   |j                   \  }}}|j                  |||      }| j                  r?| j                  |      j	                  |j
                  j                  j                        }n>|j                  |j	                  |j
                  j                  j                              }|d}	|}nd}	|j                  r|j                  |      }| j                  r| j                  |j	                  | j                  j                  j                              }
| j                  |j	                  | j                  j                  j                              }|
j	                  |j
                  j                  j                        }
|j	                  |j
                  j                  j                        }n"|j                  |      }
|j                  |      }|	rjt        j                   |
      }|d d d dd d f   dz  |d d d dd d f<   ||
z  d|z
  |
j#                         z  z   }
||z  d|z
  |j#                         z  z   }|j%                  |      }|j%                  |
      }
|j%                  |      }|j'                  ||
|      }t        j(                  ||      }|j+                  |      }| j                  r, | j,                  d   |      } | j,                  d   |      }|S  |j.                  d   |      } |j.                  d   |      }|S )NFTr6   r	  r   )r   r  r   r  r   rc   r   r   ra   r  r   r   r  rd   re   r   	ones_likedetachr   r  r  r   r  rj   )rt   rr  r   r   r   r   r  r   r   	crossattnr   r
  r  r  s                 rw   r   z%CustomDiffusionAttnProcessor.__call__  s    *7)<)<&
OQ44^_V`a..}=@@AQAQAWAWXEIIm..tyy/?/?/E/EFGE (I$1!I(,(G(GH](^%==,,-B-E-EdF`F`FgFgFmFm-noC../D/G/GHbHbHiHiHoHo/pqE&&))//0CHHTYY--334E))12CII34E__S)F%a!Qh/#5F1bqb!83,!f*

!<<CUNa&jELLN%BBE&&u-$$S)&&u-33E3O		/59..}=;D88;MJM;D88;MJM 	 +DKKN=9M*DKKN=9Mrx   TTNNTr	  r+  r   r,  r-  r.  r0  r   r/  r   rO   r   r   r2  r   r4  r5  s   @rw   r   r     s    (  %)-1EE E c]	E
 &c]E E E< 9=1588 ||8  (5	8
 !.8 
8rx   r   c                       e Zd ZdZ	 	 d	dedej                  deej                     deej                     dej                  f
dZy)
r   z
    Processor for performing attention-related computations with extra learnable key and value matrices for the text
    encoder.
    Nrr  r   r   r   r{   c                    t        |      dkD  s|j                  dd       d}t        dd|       |}|j                  |j                  d   |j                  d   d      j                  dd      }|j                  \  }	}
}|j                  ||
|	      }||}n|j                  r|j                  |      }|j                  |j                  dd            j                  dd      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  sw|j                  |      }|j!                  |      }|j                  |      }|j                  |      }t#        j$                  ||gd      }t#        j$                  ||gd      }n|}|}|j'                  |||      }t#        j(                  ||      }|j+                  |      } |j,                  d   |      } |j,                  d   |      }|j                  dd	      j/                  |j                        }||z   }|S )
Nr   rX   r  r  r6   rG   r	   r   r   )r   r  r   r  r   r   r  ra   r  rJ   rc   r   rf   rg   r*   rd   re   r   r  r  r  r   rj   r   )rt   rr  r   r   r   r  rk  r  r  r   r  r   r   encoder_hidden_states_key_proj encoder_hidden_states_value_projr   r
  r  s                     rw   r   zAttnAddedKVProcessor.__call__  sW    t9q=FJJw5A #Ugw(;< %**=+>+>q+A=CVCVWXCY[]^hhijlmn)6)<)<&
OQ44^_V`a ($1!__$($C$CDY$Z!(?(?1(EFPPQRTUV		-(&&u-)-9N)O&+/??;P+Q()-)?)?@^)_&+/+A+ABb+c((())M*CIIm,E((-C**51E));SAqICII?GQOE0C4E33E3O		/59..}= 'A}5&A}5%//B7??O%0rx   r+  r  r  rx   rw   r   r     s^     9=15:: ||:  (5	:
 !.: 
:rx   r   c                       e Zd ZdZd Z	 	 d
dedej                  deej                     deej                     dej                  f
d	Z	y)r   z
    Processor for performing scaled dot-product attention (enabled by default if you're using PyTorch 2.0), with extra
    learnable key and value matrices for the text encoder.
    c                 :    t        t        d      st        d      y )NrM   zWAttnAddedKVProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z AttnAddedKVProcessor2_0.__init__F  "    q89i  :rx   Nrr  r   r   r   r{   c                 
   t        |      dkD  s|j                  dd       d}t        dd|       |}|j                  |j                  d   |j                  d   d      j                  dd      }|j                  \  }	}
}|j                  ||
|	d	      }||}n|j                  r|j                  |      }|j                  |j                  dd            j                  dd      }|j                  |      }|j                  |d	      }|j                  |      }|j                  |      }|j                  |d	      }|j                  |d	      }|j                  s{|j                  |      }|j!                  |      }|j                  |d	      }|j                  |d	      }t#        j$                  ||gd
      }t#        j$                  ||gd
      }n|}|}t'        j(                  ||||dd      }|j                  dd      j+                  |	d|j                  d         } |j,                  d   |      } |j,                  d   |      }|j                  dd      j+                  |j                        }||z   }|S )Nr   rX   r  r  r6   rG   r	   r  )r1   r   r	  F	attn_maskr  r4   r   )r   r  r   r  r   r   r  ra   r  rJ   rc   r   rf   rg   r*   rd   re   r   r  rq   rM   r   rj   )rt   rr  r   r   r   r  rk  r  r  r   r  r   r   r  r  r   r
  s                    rw   r   z AttnAddedKVProcessor2_0.__call__L  s    t9q=FJJw5A #Ugw(;< %**=+>+>q+A=CVCVWXCY[]^hhijlmn)6)<)<&
OQ44^_V`jk4l ($1!__$($C$CDY$Z!(?(?1(EFPPQRTUV		-(&&ua&8)-9N)O&+/??;P+Q()-)?)?@^hi)?)j&+/+A+ABblm+A+n((())M*CIIm,E((a(8C**5!*<E));SAqICII?GQOE0C4E 6633RW
 &//15==j"hnn]^N_` 'A}5&A}5%//B7??O%0rx   r+  
r   r,  r-  r.  rO   r   r   r2  r   r   r  rx   rw   r   r   @  sc    
 9=15== ||=  (5	=
 !.= 
=rx   r   c                       e Zd ZdZd Z	 	 d
dedej                  dej                  deej                     dej                  f
d	Z	y)r   YAttention processor used typically in processing the SD3-like self-attention projections.c                 :    t        t        d      st        d      y )NrM   zUJointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   zJointAttnProcessor2_0.__init__  rq  rx   Nrr  r   r   r   r{   c                    |}|j                   d   }|j                  |      }	|j                  |      }
|j                  |      }|
j                   d   }||j                  z  }|	j                  |d|j                  |      j                  dd      }	|
j                  |d|j                  |      j                  dd      }
|j                  |d|j                  |      j                  dd      }|j                  |j                  |	      }	|j                  |j                  |
      }
|B|j                  |      }|j                  |      }|j                  |      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |j                  |      }|j                  |j                  |      }t        j                  |	|gd      }	t        j                  |
|gd      }
t        j                  ||gd      }t!        j"                  |	|
|dd      }|j                  dd      j%                  |d|j                  |z        }|j'                  |	j(                        }|M|d d d |j                   d   f   |d d |j                   d   d f   }}|j*                  s|j-                  |      } |j.                  d   |      } |j.                  d   |      }|||fS |S )	Nr   rG   r6   r	   r   r	  Fr  )r   rc   rd   re   r   r  r   r^   r_   rh   rf   rg   rn   ro   r   r  rq   rM   r   r   r   rV   rm   rj   )rt   rr  r   r   r   r  rk  r  r   r   r   r
  rQ   head_dim encoder_hidden_states_query_projr  r  s                    rw   r   zJointAttnProcessor2_0.__call__  s8    !"((+
 		-(ii&		-(IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C !,/3?T/U,-1__=R-S*/3?T/U,/O/T/TB

H0i1o - .L-P-PB

H.i1o + 0P/T/TB

H0i1o -   ,373D3DEe3f0  ,151B1BCa1b.IIu&FGQOE))S"@AqICIIu&FGQOE66uc5TWchi%//15==j"djj[cNcd%((5 , a!48>>!#4!445a!2!445 1M (((,8M(N% 'A}5&A}5 , "777  rx   r+  
r   r,  r-  r.  rO   r   r   FloatTensorr   r   r  rx   rw   r   r     sk    cw 486:L!L! ((L!  %00	L!
 !!2!23L! 
		L!rx   r   c                       e Zd ZdZd Z	 	 d
dedej                  dej                  deej                     dej                  f
d	Z	y)PAGJointAttnProcessor2_0r  c                 :    t        t        d      st        d      y )NrM   zXPAGJointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z!PAGJointAttnProcessor2_0.__init__  s"    q89j  :rx   Nrr  r   r   r   r{   c                 l   |}|j                   }|dk(  r7|j                  \  }}}	}
|j                  |||	|
z        j                  dd      }|j                   }|dk(  r7|j                  \  }}}	}
|j                  |||	|
z        j                  dd      }|j                  d   }|j	                  d      \  }}|j	                  d      \  }}|j                  d   }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }t        j                  ||gd      }t        j                  ||gd      }t        j                  ||gd      }|j                  d   }||j                  z  }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }t        j                  |||dd	      }|j                  dd      j!                  |d|j                  |z        }|j#                  |j$                        }|d d d |j                  d   f   |d d |j                  d   d f   }} |j&                  d   |      } |j&                  d   |      }|j(                  s|j+                  |      }|dk(  r$|j                  dd
      j!                  |	
      }|dk(  r$|j                  dd
      j!                  |	
      }|j                  d   }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }t        j                  ||gd      }t        j                  ||gd      }t        j                  ||gd      }|j                  d   }||j                  z  }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j-                  d      }t        j.                  ||f|j0                  |j$                        } t3        d      | d |d |f<   | d |d |f   j5                  d       | j7                  d      j7                  d      } t        j                  |||| dd      }|j                  dd      j!                  |d|j                  |z        }|j#                  |j$                        }|d d d |j                  d   f   |d d |j                  d   d f   }} |j&                  d   |      } |j&                  d   |      }|j(                  s|j+                  |      }|dk(  r$|j                  dd
      j!                  |	
      }|dk(  r$|j                  dd
      j!                  |	
      }t        j                  ||g      }t        j                  ||g      }||fS )Nr  r6   r	   r   r   rG   r	  Fr  r   r   -infr  )r   r   r  r   chunkrc   rd   re   rh   rf   rg   r   r  r   rq   rM   r   r   r   rj   rV   rm   r  r  r   r   fill_diagonal_r  )!rt   rr  r   r   r   r  r  r   r  r  r  context_input_ndimidentity_block_sizehidden_states_orghidden_states_ptbencoder_hidden_states_orgencoder_hidden_states_ptb	query_orgkey_org	value_org$encoder_hidden_states_org_query_proj"encoder_hidden_states_org_key_proj$encoder_hidden_states_org_value_projrQ   r  	query_ptbkey_ptb	value_ptb$encoder_hidden_states_ptb_query_proj"encoder_hidden_states_ptb_key_proj$encoder_hidden_states_ptb_value_projr   	full_masks!                                    rw   r   z!PAGJointAttnProcessor2_0.__call__  s    !"''
?1>1D1D.J)..z7FUNS]]^_abcM277"1F1L1L.J$9$>$>z7TZ]bTb$c$m$mnoqr$s! ,11!4 0=/B/B1/E,,?T?Z?Z[\?]<!#< /44Q7
 II/0	))-.II/0	 04?X/Y,-1__=V-W*/3?X/Y, IIy*NOUVW	))W&HIqQIIy*NOUVW	MM"%	

*NN:r4::xHRRSTVWX	,,z2tzz8DNNqRSTNN:r4::xHRRSTVWX	::w	SE
 .771=EEjRTVZV`V`ckVkl-00A a!48>>!#4!445a!2!445 5 +DKKN+<=*DKKN+<=$$(,8Q(R%? 1 ; ;B C K KJX_agin o"(A(K(KBPR(S([([GVU)% /44Q7
 II/0	))-.II/0	 04?X/Y,-1__=V-W*/3?X/Y, IIy*NOUVW	))W&HIqQIIy*NOUVW	MM"%	

*NN:r4::xHRRSTVWX	,,z2tzz8DNNqRSTNN:r4::xHRRSTVWX	 ..#KK' 29;K;KS\SbSbc	 AFf	&&&(<)<(<<= 	&&&(<)<(<<=LLQO ''*44Q7	::w	Y#Y^
 .771=EEjRTVZV`V`ckVkl-00A a!48>>!#4!445a!2!445 5 +DKKN+<=*DKKN+<=$$(,8Q(R%? 1 ; ;B C K KJX_agin o"(A(K(KBPR(S([([GVU)%
 		#46G"HI %		+DF_*` a333rx   r+  r  r  rx   rw   r  r    sj    c 486:P4P4 ((P4  %00	P4
 !!2!23P4 
		P4rx   r  c                       e Zd ZdZd Z	 	 d
dedej                  dej                  deej                     dej                  f
d	Z	y)PAGCFGJointAttnProcessor2_0r  c                 :    t        t        d      st        d      y )NrM   z[PAGCFGJointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z$PAGCFGJointAttnProcessor2_0.__init__  "    q89m  :rx   Nrr  r   r   r   r{   c                    |}|j                   }|dk(  r7|j                  \  }	}
}}|j                  |	|
||z        j                  dd      }|j                   }|dk(  r7|j                  \  }	}
}}|j                  |	|
||z        j                  dd      }|j                  d   }|j	                  d      \  }}}t        j                  ||g      }|j	                  d      \  }}}t        j                  ||g      }|j                  d   }	|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }t        j                  ||gd      }t        j                  ||gd      }t        j                  ||gd      }|j                  d   }||j                  z  }|j                  |	d|j                  |      j                  dd      }|j                  |	d|j                  |      j                  dd      }|j                  |	d|j                  |      j                  dd      }t        j                  |||dd	
      }|j                  dd      j!                  |	d|j                  |z        }|j#                  |j$                        }|d d d |j                  d   f   |d d |j                  d   d f   }} |j&                  d   |      } |j&                  d   |      }|j(                  s|j+                  |      }|dk(  r$|j                  dd      j!                  |	
      }|dk(  r$|j                  dd      j!                  |	
      }|j                  d   }	|j                  |      }|j                  |      }|j                  |      }|j                  |      } |j                  |      }!|j                  |      }"t        j                  || gd      }t        j                  ||!gd      }t        j                  ||"gd      }|j                  d   }||j                  z  }|j                  |	d|j                  |      j                  dd      }|j                  |	d|j                  |      j                  dd      }|j                  |	d|j                  |      j                  dd      }|j-                  d      }#t        j.                  |#|#f|j0                  |j$                        }$t3        d      |$d |d |f<   |$d |d |f   j5                  d       |$j7                  d      j7                  d      }$t        j                  ||||$dd	      }|j                  dd      j!                  |	d|j                  |z        }|j#                  |j$                        }|d d d |j                  d   f   |d d |j                  d   d f   }} |j&                  d   |      } |j&                  d   |      }|j(                  s|j+                  |      }|dk(  r$|j                  dd      j!                  |	
      }|dk(  r$|j                  dd      j!                  |	
      }t        j                  ||g      }t        j                  ||g      }||fS )Nr  r6   r	   r   r   r   rG   r	  Fr  r   r   r  r  )r   r   r  r   r  r   r  rc   rd   re   rh   rf   rg   r   rq   rM   r   r   r   rj   rV   rm   r  r  r   r   r  r  )%rt   rr  r   r   r   r  rk  r  r  r   r  r  r  r  r  hidden_states_uncondr  r  encoder_hidden_states_uncondr  r  r  r  r  r  r  r  rQ   r  r  r  r  r  r  r  r   r  s%                                        rw   r   z$PAGCFGJointAttnProcessor2_0.__call__  s     !"''
?1>1D1D.J)..z7FUNS]]^_abcM277"1F1L1L.J$9$>$>z7TZ]bTb$c$m$mnoqr$s!+11

 FSEXEXYZE[B/1B!II';=N&OP "''*		
(%%$)II/KMf.g$h! /44Q7
 II/0	))-.II/0	 04?X/Y,-1__=V-W*/3?X/Y, IIy*NOUVW	))W&HIqQIIy*NOUVW	MM"%	

*NN:r4::xHRRSTVWX	,,z2tzz8DNNqRSTNN:r4::xHRRSTVWX	::w	SE
 .771=EEjRTVZV`V`ckVkl-00A a!48>>!#4!445a!2!445 5 +DKKN+<=*DKKN+<=$$(,8Q(R%? 1 ; ;B C K KJX_agin o"(A(K(KBPR(S([([GVU)% /44Q7
 II/0	))-.II/0	 04?X/Y,-1__=V-W*/3?X/Y, IIy*NOUVW	))W&HIqQIIy*NOUVW	MM"%	

*NN:r4::xHRRSTVWX	,,z2tzz8DNNqRSTNN:r4::xHRRSTVWX	 ..#KK' 29;K;KS\SbSbc	 AFf	&&&(<)<(<<= 	&&&(<)<(<<=LLQO ''*44Q7	::w	Y#Y^
 .771=EEjRTVZV`V`ckVkl-00A a!48>>!#4!445a!2!445 5 +DKKN+<=*DKKN+<=$$(,8Q(R%? 1 ; ;B C K KJX_agin o"(A(K(KBPR(S([([GVU)%
 		#46G"HI %		+DF_*` a333rx   r+  r  r  rx   rw   r  r  ~  sj    c 486:Y4Y4 ((Y4  %00	Y4
 !!2!23Y4 
		Y4rx   r  c                       e Zd ZdZd Z	 	 d
dedej                  dej                  deej                     dej                  f
d	Z	y)FusedJointAttnProcessor2_0r  c                 :    t        t        d      st        d      y NrM   zPAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z#FusedJointAttnProcessor2_0.__init__&      q89pqq :rx   Nrr  r   r   r   r{   c                    |}|j                   }|dk(  r7|j                  \  }	}
}}|j                  |	|
||z        j                  dd      }|j                   }|dk(  r7|j                  \  }	}
}}|j                  |	|
||z        j                  dd      }|j                  d   }	|j	                  |      }|j                  d   dz  }t        j                  ||d      \  }}}|j                  |      }|j                  d   dz  }t        j                  ||d      \  }}}t        j                  ||gd      }t        j                  ||gd      }t        j                  ||gd      }|j                  d   }||j                  z  }|j                  |	d|j                  |      j                  dd      }|j                  |	d|j                  |      j                  dd      }|j                  |	d|j                  |      j                  dd      }t        j                  |||dd	
      }|j                  dd      j                  |	d|j                  |z        }|j                  |j                        }|d d d |j                  d   f   |d d |j                  d   d f   }} |j                  d   |      } |j                  d   |      }|j                   s|j#                  |      }|dk(  r$|j                  dd      j                  |	
      }|dk(  r$|j                  dd      j                  |	
      }||fS )Nr  r6   r	   r   rG   r   r   r	  Fr  r   )r   r   r  r   r  r   splitr   r  r   rq   rM   r   r   r   rj   rV   rm   )rt   rr  r   r   r   r  rk  r  r  r   r  r  r  r  qkv
split_sizer   r   r
  encoder_qkvr  r  r  rQ   r  s                            rw   r   z#FusedJointAttnProcessor2_0.__call__*  sM    !"''
?1>1D1D.J)..z7FUNS]]^_abcM277"1F1L1L.J$9$>$>z7TZ]bTb$c$m$mnoqr$s!*003
 kk-(YYr]a'
!KKZR@sE ''(=> &&r*a/

 KKZR8		
,*, 		5"BCKii<=1E		5"BCKIIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP66uc5TWchi%//15==j"djj[cNcd%((5 !0x~~a0001!X^^A.001 - 'A}5&A}5$$$(OO4I$J!?)33B;CCJPWY_afgM"$9$C$CB$K$S$ST^`gioqv$w!333rx   r+  r  r  rx   rw   r  r  #  sk    cr 486:E4E4 ((E4  %00	E4
 !!2!23E4 
		E4rx   r  c                       e Zd ZdZddee   fdZ	 	 ddedej                  dej                  deej                     d	ej                  f
d
Z
y)r     
    Processor for implementing memory efficient attention using xFormers.

    Args:
        attention_op (`Callable`, *optional*, defaults to `None`):
            The base
            [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to
            use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best
            operator.
    Nr   c                     || _         y r*  r   rt   r   s     rw   rO   z#XFormersJointAttnProcessor.__init__~  
    (rx   rr  r   r   r   r{   c                    |}|j                  |      }|j                  |      }	|j                  |      }
|j                  |      j	                         }|j                  |	      j	                         }	|j                  |
      j	                         }
|j
                  |j                  |      }|j                  |j                  |	      }	||j                  |      }|j                  |      }|j                  |      }|j                  |      j	                         }|j                  |      j	                         }|j                  |      j	                         }|j                  |j                  |      }|j                  |j                  |      }t        j                  ||gd      }t        j                  |	|gd      }	t        j                  |
|gd      }
t        j                  j!                  ||	|
|| j"                  |j$                        }|j'                  |j(                        }|j+                  |      }|M|d d d |j,                  d   f   |d d |j,                  d   d f   }}|j.                  s|j1                  |      } |j2                  d   |      } |j2                  d   |      }|||fS |S )Nr6   r   	attn_biasoprX   r   )rc   rd   re   r   
contiguousr^   r_   rh   rf   rg   rn   ro   r   r  r   r   r   r   rX   r   r   r   r   rV   rm   rj   )rt   rr  r   r   r   r  rk  r  r   r   r
  r  r  r  s                 rw   r   z#XFormersJointAttnProcessor.__call__  s    ! 		-(ii&		-(&&u-88:$$S)446&&u-88:;;"KK&E;;"++c"C !,/3?T/U,-1__=R-S*/3?T/U,/3/E/EFf/g/r/r/t,-1-C-CDb-c-n-n-p*/3/E/EFf/g/r/r/t,  ,373D3DEe3f0  ,151B1BCa1b.IIu&FGQOE))S"@AqICIIu&FGQOE ??3D<M<MUYU_U_ @ 
 &((5..}= , a!48>>!#4!445a!2!445 1M (((,8M(N% 'A}5&A}5 , "777  rx   r*  r+  )r   r,  r-  r.  r   r   rO   r   r   r  r   r  rx   rw   r   r   r  sx    	)Xh%7 ) 486:C!C! ((C!  %00	C!
 !!2!23C! 
		C!rx   r   c                       e Zd ZdZd Z	 	 	 	 ddedej                  deej                     deej                     deej                     d	eej                     d
ej                  fdZ	y)AllegroAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the Allegro model. It applies a normalization layer and rotary embedding on the query and key vector.
    c                 :    t        t        d      st        d      y )NrM   zWAllegroAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z AllegroAttnProcessor2_0.__init__  r  rx   Nrr  r   r   r   r  rs  r{   c                     |}|j                   |j                  ||      }|j                  }|dk(  r7|j                  \  }	}
}}|j                  |	|
||z        j	                  dd      }||j                  n|j                  \  }	}}|>|j                  |||	      }|j                  |	|j                  d|j                  d         }|j                  1|j                  |j	                  dd            j	                  dd      }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j                  |      }|j                  d   }||j                  z  }|j                  |	d|j                  |      j	                  dd      }|j                  |	d|j                  |      j	                  dd      }|j                  |	d|j                  |      j	                  dd      }|2|j                  s&ddlm}  |||d   |d         } |||d   |d         }t!        j"                  ||||dd	      }|j	                  dd      j%                  |	d|j                  |z        }|j'                  |j(                        } |j*                  d   |      } |j*                  d   |      }|dk(  r$|j	                  dd
      j%                  |	
      }|j,                  r||z   }||j.                  z  }|S )Nr  r6   r	   rG   )apply_rotary_emb_allegror   r	  Fr  r   )r]   r   r   r  r   r  r   rJ   rc   ra   r  rd   re   rT   
embeddingsr  rq   rM   r   r   r   rj   r-   r,   )rt   rr  r   r   r   r  rs  r  r  r   r  r  r  r  r   r   r   r
  rQ   r  r  s                        rw   r   z AllegroAttnProcessor2_0.__call__  s    !( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN??& OOM,C,CAq,IJTTUVXYZM		-( ($1!__$($C$CDY$Z!ii-.		/0IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP '0G0G<,U4DQ4GIYZ[I\]E*30@0CEUVWEXYC 6633RW
 &//15==j"djj[cNcd%((5 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   NNNNr  r  rx   rw   r  r    s    
 9=15'+37OO ||O  (5	O
 !.O u||$O #5<<0O 
Orx   r  c            	       n    e Zd ZdZd Z	 d	dedej                  dej                  dej                  fdZy)
AuraFlowAttnProcessor2_0z;Attention processor used typically in processing Aura Flow.c                 T    t        t        d      st        dd      rt        d      y y )NrM   r}   2.1zAuraFlowAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to at least 2.1 or above as we use `scale` in `F.scaled_dot_product_attention()`. rp   rq   r   ro  rp  s    rw   rO   z!AuraFlowAttnProcessor2_0.__init__(  s4    q89>NsTY>Z v  ?[9rx   Nrr  r   r   r{   c                 P   |j                   d   }|j                  |      }|j                  |      }|j                  |      }	|3|j	                  |      }
|j                  |      }|j                  |      }|j                   d   }||j                  z  }|j                  |d|j                  |      }|j                  |d|j                  |      }|	j                  |d|j                  |      }	|j                  |j                  |      }|j                  |j                  |      }|
j                  |d|j                  |      }
j                  |d|j                  |      }j                  |d|j                  |      }|j                  |j                  |
      }
|j                  |j                  |      }t        j                  |
|gd      }t        j                  ||gd      }t        j                  ||	gd      }	|j                  dd      }|j                  dd      }|	j                  dd      }	t!        j"                  |||	d|j$                  d      }|j                  dd      j'                  |d|j                  |z        }|j)                  |j*                        }|0|d d |j                   d   d f   |d d d |j                   d   f   }} |j,                  d   |      } |j,                  d   |      }||j/                  |      }|||fS |S )	Nr   rG   r6   r   r	   r	  Fr  rX   r4   )r   rc   rd   re   rh   rf   rg   r   r  r^   r_   rn   ro   r   r  r   rq   rM   rX   r   r   r   rj   rm   )rt   rr  r   r   r  rk  r   r   r   r
  r  r  r  rQ   r  s                  rw   r   z!AuraFlowAttnProcessor2_0.__call__.  s-    #((+
 		-(ii&		-( !,/3?T/U,-1__=R-S*/3?T/U, IIbM	

*

:r4::x@hhz2tzz8<

:r4::x@ ;;"KK&E;;"++c"C !,/O/T/TB

H0, .L-P-PQ[]_aeakakmu-v*/O/T/TB

H0,   ,373D3DEe3f0  ,151B1BCa1b.II?GQOE));SAqICII?GQOE1%mmAq!1% 663DJJ%
 &//15==j"djj[cNcd%((5 !,a!6!<!<Q!?!AABa!A#8#>#>q#A!AAB 1M 'A}5&A}5 ,$(OO4I$J! , "777  rx   r*  	r   r,  r-  r.  rO   r   r   r  r   r  rx   rw   r  r  %  sQ    E 48	Q!Q! ((Q!  %00	Q! 
		Q!rx   r  c            	       n    e Zd ZdZd Z	 d	dedej                  dej                  dej                  fdZy)
FusedAuraFlowAttnProcessor2_0zRAttention processor used typically in processing Aura Flow with fused projections.c                 T    t        t        d      st        dd      rt        d      y y )NrM   r}   r  zFusedAuraFlowAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to at least 2.1 or above as we use `scale` in `F.scaled_dot_product_attention()`. r  rp  s    rw   rO   z&FusedAuraFlowAttnProcessor2_0.__init__  s4    q89>NsTY>Z {  ?[9rx   Nrr  r   r   r{   c                    |j                   d   }|j                  |      }|j                   d   dz  }t        j                  ||d      \  }	}
}|?|j	                  |      }|j                   d   dz  }t        j                  ||d      \  }}}|
j                   d   }||j
                  z  }|	j                  |d|j
                  |      }	|
j                  |d|j
                  |      }
|j                  |d|j
                  |      }|j                  |j                  |	      }	|j                  |j                  |
      }
|j                  |d|j
                  |      }j                  |d|j
                  |      }j                  |d|j
                  |      }|j                  |j                  |      }|j                  |j                  |      }t        j                  ||	gd      }	t        j                  ||
gd      }
t        j                  ||gd      }|	j                  dd      }	|
j                  dd      }
|j                  dd      }t        j                  |	|
|d|j                  d	      }|j                  dd      j!                  |d|j
                  |z        }|j#                  |	j$                        }|0|d d |j                   d   d f   |d d d |j                   d   f   }} |j&                  d   |      } |j&                  d   |      }||j)                  |      }|||fS |S )
Nr   rG   r   r   r6   r	   r	  Fr  )r   r  r   r  r   r   r  r^   r_   rn   ro   r  r   rq   rM   rX   r   r   r   rj   rm   )rt   rr  r   r   r  rk  r   r  r  r   r   r
  r  r  r  r  rQ   r  s                     rw   r   z&FusedAuraFlowAttnProcessor2_0.__call__  sR    #((+
 kk-(YYr]a'
!KKZR@sE !,++,ABK$**2.!3J
 K<	0.0 IIbM	

*

:r4::x@hhz2tzz8<

:r4::x@ ;;"KK&E;;"++c"C !,/O/T/TB

H0, .L-P-PQ[]_aeakakmu-v*/O/T/TB

H0,   ,373D3DEe3f0  ,151B1BCa1b.II?GQOE));SAqICII?GQOE1%mmAq!1% 663DJJ%
 &//15==j"djj[cNcd%((5 !,a!6!<!<Q!?!AABa!A#8#>#>q#A!AAB 1M 'A}5&A}5 ,$(OO4I$J! , "777  rx   r*  r  r  rx   rw   r  r    sQ    \ 48	U!U! ((U!  %00	U! 
		U!rx   r  c                       e Zd ZdZd Z	 	 ddedej                  dej                  deej                     deej                     d	ej                  fd
Z	y)CogVideoXAttnProcessor2_0
    Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
    query and key vectors, but does not include spatial normalization.
    c                 :    t        t        d      st        d      y NrM   zVCogVideoXAttnProcessor requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z"CogVideoXAttnProcessor2_0.__init__      q89vww :rx   Nrr  r   r   r   rs  r{   c                    |j                  d      }t        j                  ||gd      }|j                  \  }}}	|>|j	                  |||      }|j                  ||j                  d|j                  d         }|j                  |      }
|j                  |      }|j                  |      }|j                  d   }||j                  z  }|
j                  |d|j                  |      j                  dd      }
|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |j                  |
      }
|j                  |j                  |      }|Tddlm}  ||
d d d d |d f   |      |
d d d d |d f<   |j                  s! ||d d d d |d f   |      |d d d d |d f<   t!        j"                  |
|||dd      }|j                  dd      j%                  |d|j                  |z        } |j&                  d	   |      } |j&                  d   |      }|j)                  ||j                  d      |z
  gd      \  }}||fS )
Nr6   r   rG   r	   r  r	  Fr  r   )r  r   r  r   r  r  r   rc   rd   re   r   r^   r_   r	  r  rT   rq   rM   r   rj   r  )rt   rr  r   r   r   rs  text_seq_lengthr   r  r   r   r   r
  rQ   r  r  s                   rw   r   z"CogVideoXAttnProcessor2_0.__call__  s    044Q7		#8-"HaP)6)<)<&
OQ%!88ZdeN+00TZZ^MaMabdMefN		-(ii&		-(IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C '4,<U1aIYCY=Z\l,mE!Q(()**.>s1aIYCY?Z\l.mAq/**+6633RW
 &//15==j"djj[cNcd 'A}5&A}5/</B/Bm003oEFA 0C 0
,} 333rx   r+  r  r  rx   rw   r  r    sr    
x 26378484 ||84  %||	84
 !.84 #5<<084 
84rx   r  c                       e Zd ZdZd Z	 	 ddedej                  dej                  deej                     deej                     d	ej                  fd
Z	y)FusedCogVideoXAttnProcessor2_0r  c                 :    t        t        d      st        d      y r  rn  rp  s    rw   rO   z'FusedCogVideoXAttnProcessor2_0.__init__.	  r  rx   Nrr  r   r   r   rs  r{   c                 F   |j                  d      }t        j                  ||gd      }||j                  n|j                  \  }}}	|>|j	                  |||      }|j                  ||j                  d|j                  d         }|j                  |      }
|
j                  d   dz  }t        j                  |
|d      \  }}}|j                  d   }||j                  z  }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |j                  |      }|j                  |j                  |      }|Tddlm}  ||d d d d |d f   |      |d d d d |d f<   |j                  s! ||d d d d |d f   |      |d d d d |d f<   t        j                   ||||dd	      }|j                  dd      j#                  |d|j                  |z        } |j$                  d
   |      } |j$                  d   |      }|j                  ||j                  d      |z
  gd      \  }}||fS )Nr6   r   rG   r   r	   r  r	  Fr  r   )r  r   r  r   r  r  r   r  r  r   r^   r_   r	  r  rT   rq   rM   r   rj   )rt   rr  r   r   r   rs  r  r   r  r   r  r  r   r   r
  rQ   r  r  s                     rw   r   z'FusedCogVideoXAttnProcessor2_0.__call__2	  s    044Q7		#8-"HaP $9#@MF[FaFa 	'
OQ %!88ZdeN+00TZZ^MaMabdMefNkk-(YYr]a'
!KKZR@sEIIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C '4,<U1aIYCY=Z\l,mE!Q(()**.>s1aIYCY?Z\l.mAq/**+6633RW
 &//15==j"djj[cNcd 'A}5&A}5/</B/Bm003oEFA 0C 0
,} 333rx   r+  r  r  rx   rw   r!  r!  (	  sr    
x 2637:4:4 ||:4  %||	:4
 !.:4 #5<<0:4 
:4rx   r!  c                       e Zd ZdZddee   fdZ	 	 ddedej                  deej                     deej                     d	ej                  f
d
Z
y)r   r  Nr   c                     || _         y r*  r   r  s     rw   rO   z%XFormersAttnAddedKVProcessor.__init__{	  r  rx   rr  r   r   r   r{   c                    |}|j                  |j                  d   |j                  d   d      j                  dd      }|j                  \  }}}|j                  |||      }||}n|j                  r|j                  |      }|j                  |j                  dd            j                  dd      }|j                  |      }	|j                  |	      }	|j                  |      }
|j                  |      }|j                  |
      }
|j                  |      }|j                  sw|j                  |      }|j                  |      }|j                  |      }|j                  |      }t        j                  |
|gd      }t        j                  ||gd      }n|
}|}t         j"                  j%                  |	|||| j&                  |j(                        }|j+                  |	j,                        }|j/                  |      } |j0                  d   |      } |j0                  d   |      }|j                  dd      j3                  |j                        }||z   }|S )Nr   r6   rG   r	   r   r   r   )r  r   r   r  ra   r  rJ   rc   r   rf   rg   r*   rd   re   r   r  r   r   r   r   rX   r   r   r   rj   r   )rt   rr  r   r   r   r  r   r  r   r   r  r  r   r
  s                 rw   r   z%XFormersAttnAddedKVProcessor.__call__~	  sL    !%**=+>+>q+A=CVCVWXCY[]^hhijlmn)6)<)<&
OQ44^_V`a ($1!__$($C$CDY$Z!(?(?1(EFPPQRTUV		-(&&u-)-9N)O&+/??;P+Q()-)?)?@^)_&+/+A+ABb+c((())M*CIIm,E((-C**51E));SAqICII?GQOE0C4E ??3D<M<MUYU_U_ @ 
 &((5..}= 'A}5&A}5%//B7??O%0rx   r*  r+  r   r,  r-  r.  r   r   rO   r   r   r2  r   r  rx   rw   r   r   o	  so    	)Xh%7 ) 9=1555 ||5  (5	5
 !.5 
5rx   r   c                       e Zd ZdZddee   fdZ	 	 	 ddedej                  deej                     deej                     d	eej                     d
ej                  fdZ
y)r   r  Nr   c                     || _         y r*  r   r  s     rw   rO   zXFormersAttnProcessor.__init__	  r  rx   rr  r   r   r   r  r{   c                 n   t        |      dkD  s|j                  dd       d}t        dd|       |}	|j                  |j                  ||      }|j                  }
|
dk(  r7|j
                  \  }}}}|j                  ||||z        j                  dd      }||j
                  n|j
                  \  }}}|j                  |||      }|#|j
                  \  }}}|j                  d|d      }|j                  1|j                  |j                  dd            j                  dd      }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j                  |      }|j!                  |      j#                         }|j!                  |      j#                         }|j!                  |      j#                         }t$        j&                  j)                  ||||| j*                  |j,                  	      }|j/                  |j0                        }|j3                  |      } |j4                  d   |      } |j4                  d   |      }|
dk(  r$|j                  dd
      j7                  |      }|j8                  r||	z   }||j:                  z  }|S )Nr   rX   r  r  r  r6   r	   rG   r   r   )r   r  r   r]   r   r   r  r   r  expandrJ   rc   ra   r  rd   re   r   r  r   r   r   r   rX   r   r   r   rj   r   r-   r,   )rt   rr  r   r   r   r  r  rk  r  r  r  r   r  r  r  
key_tokensr   query_tokensr   r   r
  s                        rw   r   zXFormersAttnProcessor.__call__	  s    t9q=FJJw5A #Ugw(;< ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	"
J 44^ZQ[\% "/!4!4A|Q+222|RHN??& OOM,C,CAq,IJTTUVXYZM		-( ($1!__$($C$CDY$Z!ii-.		/0&&u-88:$$S)446&&u-88: ??3D<M<MUYU_U_ @ 
 &((5..}= 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   r*  r  r'  r  rx   rw   r   r   	  s    	)Xh%7 ) 9=15'+LL ||L  (5	L
 !.L u||$L 
Lrx   r   c                       e Zd ZdZd Z	 	 	 ddedej                  deej                     deej                     deej                     d	ej                  fd
Z	y)r   a  
    Processor for implementing flash attention using torch_npu. Torch_npu supports only fp16 and bf16 data types. If
    fp32 is used, F.scaled_dot_product_attention will be used for computation, but the acceleration effect on NPU is
    not significant.

    c                 .    t               st        d      y )NzTAttnProcessorNPU requires torch_npu extensions and is supported only on npu devices.)r   ro  rp  s    rw   rO   zAttnProcessorNPU.__init__
  s    %'tuu (rx   Nrr  r   r   r   r  r{   c                    t        |      dkD  s|j                  dd       d}t        dd|       |}	|j                  |j                  ||      }|j                  }
|
dk(  r7|j
                  \  }}}}|j                  ||||z        j                  dd      }||j
                  n|j
                  \  }}}||j                  |||      }|j                  ||j                  d|j
                  d         }|j                  dd|j
                  d   d      }|j                  t        j                  k(  r$t        j                  |j                               }n|j                         }|j                  1|j                  |j                  dd            j                  dd      }|j!                  |      }||}n|j"                  r|j%                  |      }|j'                  |      }|j)                  |      }|j
                  d   }||j                  z  }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  t        j*                  t        j,                  fv rSt/        j0                  ||||j                  d	d |d
t3        j4                  |j
                  d         z  ddd
dd      d   }nt7        j8                  ||||dd      }|j                  dd      j;                  |d|j                  |z        }|j=                  |j                        } |j>                  d   |      } |j>                  d   |      }|
dk(  r$|j                  dd      j;                  |      }|j@                  r||	z   }||jB                  z  }|S )Nr   rX   r  r  r  r6   r	   rG   BNSDr:   i   F)	input_layoutpse
atten_maskrX   pre_tockensnext_tockens	keep_probsyncinner_preciser	  r  r   )"r   r  r   r]   r   r   r  r   r  r   repeatr   r   r0  logical_notrJ   rc   ra   r  rd   re   float16bfloat16	torch_npunpu_fusion_attentionmathsqrtrq   rM   r   r   rj   r-   r,   rt   rr  r   r   r   r  r  rk  r  r  r  r   r  r  r  r  r   r   r   r
  rQ   r  s                         rw   r   zAttnProcessorNPU.__call__ 
  s    t9q=FJJw5A #Ugw(;< ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN+221a9L9LQ9OQRSN##uzz1!&!2!2>3F3F3H!I!/!4!4!6??& OOM,C,CAq,IJTTUVXYZM		-( ($1!__$($C$CDY$Z!ii-.		/0IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP ;;5==%..99%::

#)DIIekk"o66!" M" ::sE^sV[M &//15==j"djj[cNcd%((5 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   r  r  r  rx   rw   r   r   
  s    v 9=15'+cc ||c  (5	c
 !.c u||$c 
crx   r   c                       e Zd ZdZd Z	 	 	 ddedej                  deej                     deej                     deej                     d	ej                  fd
Z	y)rr   s
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
    c                 :    t        t        d      st        d      y r  rn  rp  s    rw   rO   zAttnProcessor2_0.__init__
  r  rx   Nrr  r   r   r   r  r{   c                    t        |      dkD  s|j                  dd       d}t        dd|       |}	|j                  |j                  ||      }|j                  }
|
dk(  r7|j
                  \  }}}}|j                  ||||z        j                  dd      }||j
                  n|j
                  \  }}}|>|j                  |||      }|j                  ||j                  d|j
                  d         }|j                  1|j                  |j                  dd            j                  dd      }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j                  |      }|j
                  d   }||j                  z  }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                   |j!                  |      }|j"                  |j#                  |      }t%        j&                  ||||d	d
      }|j                  dd      j)                  |d|j                  |z        }|j+                  |j,                        } |j.                  d   |      } |j.                  d   |      }|
dk(  r$|j                  dd      j)                  |      }|j0                  r||	z   }||j2                  z  }|S )Nr   rX   r  r  r  r6   r	   rG   r	  Fr  r   )r   r  r   r]   r   r   r  r   r  r   rJ   rc   ra   r  rd   re   r^   r_   rq   rM   r   r   r   rj   r-   r,   rB  s                         rw   r   zAttnProcessor2_0.__call__
  s.    t9q=FJJw5A #Ugw(;< ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN??& OOM,C,CAq,IJTTUVXYZM		-( ($1!__$($C$CDY$Z!ii-.		/0IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C 6633RW
 &//15==j"djj[cNcd%((5 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   r  r  r  rx   rw   rr   rr   
  s    r 9=15'+RR ||R  (5	R
 !.R u||$R 
Rrx   rr   c                       e Zd ZdZddeeee   df      fdZ	 	 	 ddede	j                  dee	j                     d	ee	j                     d
ee	j                     de	j                  fdZy)r   z
    Processor for implementing scaled dot-product attention with pallas flash attention kernel if using `torch_xla`.
    Nrz   .c                     t        t        d      st        d      t        dd      rt        d      t	               rt        dd      rt        d      || _        y )NrM   zXXLAFlashAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r}   r~   6XLA flash attention requires torch_xla version >= 2.3.r   DSPMD support for XLA flash attention needs torch_xla version >= 2.4.)rp   rq   ro  r   r   rz   )rt   rz   s     rw   rO   z!XLAFlashAttnProcessor2_0.__init__
  sX    q89j   U+VWW9-c59dee,rx   rr  r   r   r   r  r{   c                    |}|j                   |j                  ||      }|j                  }	|	dk(  r7|j                  \  }
}}}|j                  |
|||z        j	                  dd      }||j                  n|j                  \  }
}}|>|j                  |||
      }|j                  |
|j                  d|j                  d         }|j                  1|j                  |j	                  dd            j	                  dd      }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j                  |      }|j                  d   }||j                  z  }|j                  |
d|j                  |      j	                  dd      }|j                  |
d|j                  |      j	                  dd      }|j                  |
d|j                  |      j	                  dd      }|j                  |j                  |      }|j                  |j                  |      }t        d |||fD              r|n|j                  |
dd|j                  d         }|j!                         j#                  |dk(  t!        d            j#                  |dk(  t!        d            }||z   }|t%        j&                  |j                  d	         z  }t)               r| j*                  nd }t-        |||d
|      }n0t.        j1                  d       t3        j4                  ||||dd
      }|j	                  dd      j7                  |
d|j                  |z        }|j9                  |j:                        } |j<                  d   |      } |j<                  d   |      }|	dk(  r$|j	                  dd      j7                  |
      }|j>                  r||z   }||j@                  z  }|S )Nr  r6   r	   rG   c              3   @   K   | ]  }|j                   d    dk\    yw)r	   i   N)r   ).0r   s     rw   	<genexpr>z4XLAFlashAttnProcessor2_0.__call__.<locals>.<genexpr>.  s     I6v||A$&Is   r   r  r	  r   F)causalrz   z[Unable to use the flash attention pallas kernel API call due to QKV sequence length < 4096.r  r   )!r]   r   r   r  r   r  r   rJ   rc   ra   r  rd   re   r^   r_   allr   masked_fillr@  rA  r   rz   r   r   r   rq   rM   r   r   r   rj   r-   r,   )rt   rr  r   r   r   r  r  rk  r  r  r   r  r  r  r  r   r   r   r
  rQ   r  rz   s                         rw   r   z!XLAFlashAttnProcessor2_0.__call__
  s    !( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN??& OOM,C,CAq,IJTTUVXYZM		-( ($1!__$($C$CDY$Z!ii-.		/0IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C IeS%5HII)!/!4!4ZA~G[G[\^G_!` #((* [1!4eFmD [1!4eCjA  N*TYYu{{1~..E4;IT004N+E3e\jkMNNm ::sE^sV[M &//15==j"djj[cNcd%((5 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   r*  r  )r   r,  r-  r.  r   r   r1  rO   r   r   r2  r   r  rx   rw   r   r   
  s    	-xhsmS6H0I'J 	- 9=15'+aa ||a  (5	a
 !.a u||$a 
arx   r   c                       e Zd ZdZd Z	 	 d
dedej                  deej                     deej                     dej                  f
d	Z	y)MochiVaeAttnProcessor2_0z0
    Attention processor used in Mochi VAE.
    c                 :    t        t        d      st        d      y r  rn  rp  s    rw   rO   z!MochiVaeAttnProcessor2_0.__init__]  r  rx   Nrr  r   r   r   r{   c                 N   |}|j                   d   dk(  }||j                   n|j                   \  }}}	|>|j                  |||      }|j                  ||j                  d|j                   d         }|r]|j	                  |      } |j
                  d   |      } |j
                  d   |      }|j                  r||z   }||j                  z  }|S |j                  |      }
||}|j                  |      }|j	                  |      }|j                   d   }||j                  z  }|
j                  |d|j                  |      j                  dd      }
|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |j                  |
      }
|j                  |j                  |      }t        j                  |
|||d|j                        }|j                  dd      j!                  |d|j                  |z        }|j#                  |
j$                        } |j
                  d   |      } |j
                  d   |      }|j                  r||z   }||j                  z  }|S )Nr6   rG   r   r	   r	  r  )r   r  r  r   re   rj   r-   r,   rc   rd   r   r^   r_   rq   rM   r4   r   r   r   )rt   rr  r   r   r   r  is_single_framer   r  r   r   r   r
  rQ   r  s                  rw   r   z!MochiVaeAttnProcessor2_0.__call__a  s    !'--a0A5 $9#@MF[FaFa 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN IIm4M +DKKN=9M*DKKN=9M'' - 8)D,F,FFM  		-( ($1!ii-.		/0IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C 6633RVR`R`
 &//15==j"djj[cNcd%((5 'A}5&A}5##)H4M%(B(BBrx   r+  r  r  rx   rw   rT  rT  X  sj    r 9=15II ||I  (5	I
 !.I 
Irx   rT  c                   
   e Zd ZdZd Zdej                  deej                     dej                  fdZ	 	 	 dde	d	ej                  d
e
ej                     de
ej                     de
ej                     dej                  fdZy)StableAudioAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the Stable Audio model. It applies rotary embedding on query and key vector, and allows MHA, GQA or MQA.
    c                 :    t        t        d      st        d      y )NrM   z[StableAudioAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z$StableAudioAttnProcessor2_0.__init__  r  rx   rx  	freqs_cisr{   c                     ddl m} |d   j                  d   }|dd |f   |d|d f   }} |||dd      }t        j                  ||fd	      }|S )
Nr6   r  r   rG   .Tr   use_realuse_real_unbind_dimr   )r	  r  r   r   r  )	rt   rx  r[  r  rot_dimx_to_rotatex_unrotated	x_rotatedouts	            rw   apply_partial_rotary_embz4StableAudioAttnProcessor2_0.apply_partial_rotary_emb  sg    
 	1A,$$R(#$S(7(]#3QsGH}5E[$[)d`bc	iiK0b9
rx   Nrr  r   r   r   
rotary_embc                 h   ddl m} |}|j                  }|dk(  r7|j                  \  }	}
}}|j	                  |	|
||z        j                  dd      }||j                  n|j                  \  }	}}|>|j                  |||	      }|j	                  |	|j                  d|j                  d         }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j                  |      }|j                  d   |j                  z  }|j                  d   |z  }|j	                  |	d|j                  |      j                  dd      }|j	                  |	d||      j                  dd      }|j	                  |	d||      j                  dd      }||j                  k7  ra|j                  |z  }t        j                  ||d|j                  d   |z        }t        j                  ||d|j                  d   |z        }|j                  |j                  |      }|j                   |j!                  |      }||j"                  }|j"                  }|j%                  t        j&                        }|j%                  t        j&                        }|d   j                  d   }|dd |f   |d|d f   }} |||d	d
      }t        j(                  ||fd      }|j*                  s7|dd |f   |d|d f   }} |||d	d
      }t        j(                  ||fd      }|j%                  |      }|j%                  |      }t-        j.                  ||||dd      }|j                  dd      j1                  |	d|j                  |z        }|j%                  |j"                        } |j2                  d   |      } |j2                  d   |      }|dk(  r$|j                  dd
      j1                  |	
      }|j4                  r||z   }||j6                  z  }|S )Nr6   r  r  r	   rG   r  r   .Tr   r]  r   r	  Fr  )r	  r  r   r   r  r   r  r   rc   ra   r  rd   re   r   r  r^   r_   r   r   rY  r  rT   rq   rM   r   rj   r-   r,   )rt   rr  r   r   r   rf  r  r  r  r   r  r  r  r  r   r   r   r
  r  r   heads_per_kv_headquery_dtype	key_dtyper`  query_to_rotatequery_unrotatedquery_rotatedkey_to_rotatekey_unrotatedkey_rotateds                                 rw   r   z$StableAudioAttnProcessor2_0.__call__  s+    	1 "''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN		-( ($1!__$($C$CDY$Z!ii-.		/0;;r?djj099R=H,

:r4::x@JJ1aPhhz2x:DDQJ

:r8X>HHANtzz! $

h 6))#/@aUXU^U^_`UaduUuvC++(aU[[^N_=_E ;;"KK&E;;"++c"C !++K		IHHU]]+E&&'C m))"-G/4S(7(]/CU3PWPX=EY_O,_jSWmopMII}o>BGE**/23=/A3sGH}CU}.}jSWmopiim <"EHH[)E&&#C 6633RW
 &//15==j"djj[cNcd%((5 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   r  )r   r,  r-  r.  rO   r   r2  r   re  r   r   r   r  rx   rw   rY  rY    s    
<< & 
	& 9=15-1gg ||g  (5	g
 !.g U\\*g 
grx   rY  c                       e Zd ZdZd Z	 	 	 	 ddedej                  deej                     deej                     deej                     d	eej                     d
ej                  fdZ	y)HunyuanAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the HunyuanDiT model. It applies a s normalization layer and rotary embedding on query and key vector.
    c                 :    t        t        d      st        d      y r  rn  rp  s    rw   rO   z HunyuanAttnProcessor2_0.__init__8  r  rx   Nrr  r   r   r   r  rs  r{   c                 x   ddl m} |}|j                  |j                  ||      }|j                  }	|	dk(  r7|j                  \  }
}}}|j                  |
|||z        j                  dd      }||j                  n|j                  \  }
}}|>|j                  |||
      }|j                  |
|j                  d|j                  d         }|j                  1|j                  |j                  dd            j                  dd      }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j                  |      }|j                  d   }||j                  z  }|j                  |
d|j                  |      j                  dd      }|j                  |
d|j                  |      j                  dd      }|j                  |
d|j                  |      j                  dd      }|j                  |j                  |      }|j                   |j!                  |      }| |||      }|j"                  s	 |||      }t%        j&                  ||||dd      }|j                  dd      j)                  |
d|j                  |z        }|j+                  |j,                        } |j.                  d	   |      } |j.                  d   |      }|	dk(  r$|j                  dd
      j)                  |
      }|j0                  r||z   }||j2                  z  }|S Nr6   r  r  r	   rG   r	  Fr  r   r   )r	  r  r]   r   r   r  r   r  r   rJ   rc   ra   r  rd   re   r^   r_   rT   rq   rM   r   r   r   rj   r-   r,   )rt   rr  r   r   r   r  rs  r  r  r  r   r  r  r  r  r   r   r   r
  rQ   r  s                        rw   r   z HunyuanAttnProcessor2_0.__call__<  s+    	1 ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN??& OOM,C,CAq,IJTTUVXYZM		-( ($1!__$($C$CDY$Z!ii-.		/0IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C '$U,<=E**&s,<= 6633RW
 &//15==j"djj[cNcd%((5 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   r
  r  r  rx   rw   rr  rr  2  s    
r 9=15'+37UU ||U  (5	U
 !.U u||$U #5<<0U 
Urx   rr  c                       e Zd ZdZd Z	 	 	 	 ddedej                  deej                     deej                     deej                     d	eej                     d
ej                  fdZ	y)FusedHunyuanAttnProcessor2_0a  
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0) with fused
    projection layers. This is used in the HunyuanDiT model. It applies a s normalization layer and rotary embedding on
    query and key vector.
    c                 :    t        t        d      st        d      y )NrM   z\FusedHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z%FusedHunyuanAttnProcessor2_0.__init__  s"    q89n  :rx   Nrr  r   r   r   r  rs  r{   c                 *   ddl m} |}|j                  |j                  ||      }|j                  }	|	dk(  r7|j                  \  }
}}}|j                  |
|||z        j                  dd      }||j                  n|j                  \  }
}}|>|j                  |||
      }|j                  |
|j                  d|j                  d         }|j                  1|j                  |j                  dd            j                  dd      }|@|j                  |      }|j                  d   dz  }t        j                  ||d      \  }}}nl|j                  r|j                  |      }|j                  |      }|j!                  |      }|j                  d   dz  }t        j                  ||d      \  }}|j                  d   }||j                  z  }|j                  |
d|j                  |      j                  dd      }|j                  |
d|j                  |      j                  dd      }|j                  |
d|j                  |      j                  dd      }|j"                  |j#                  |      }|j$                  |j%                  |      }| |||      }|j&                  s	 |||      }t)        j*                  ||||dd	
      }|j                  dd      j-                  |
d|j                  |z        }|j/                  |j0                        } |j2                  d   |      } |j2                  d   |      }|	dk(  r$|j                  dd      j-                  |
      }|j4                  r||z   }||j6                  z  }|S )Nr6   r  r  r	   rG   r   r   r	  Fr  r   r   )r	  r  r]   r   r   r  r   r  r   rJ   r  r   r  ra   r  rc   r  r^   r_   rT   rq   rM   r   r   r   rj   r-   r,   )rt   rr  r   r   r   r  rs  r  r  r  r   r  r  r  r  r   r  r  r   r   r
  kvrQ   r  s                           rw   r   z%FusedHunyuanAttnProcessor2_0.__call__  sw    	1 ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN??& OOM,C,CAq,IJTTUVXYZM (++m,C2!+J %C DE3(,(G(GH](^%IIm,E12B"*JR<JCIIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C '$U,<=E**&s,<= 6633RW
 &//15==j"djj[cNcd%((5 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   r
  r  r  rx   rw   rw  rw    s     9=15'+37WW ||W  (5	W
 !.W u||$W #5<<0W 
Wrx   rw  c                       e Zd ZdZd Z	 	 	 	 ddedej                  deej                     deej                     deej                     d	eej                     d
ej                  fdZ	y)PAGHunyuanAttnProcessor2_0_  
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the HunyuanDiT model. It applies a normalization layer and rotary embedding on query and key vector. This
    variant of the processor employs [Pertubed Attention Guidance](https://huggingface.co/papers/2403.17377).
    c                 :    t        t        d      st        d      y )NrM   zZPAGHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z#PAGHunyuanAttnProcessor2_0.__init__  s"    q89l  :rx   Nrr  r   r   r   r  rs  r{   c                 F   ddl m} |}|j                  |j                  ||      }|j                  }	|	dk(  r7|j                  \  }
}}}|j                  |
|||z        j                  dd      }|j                  d      \  }}||j                  n|j                  \  }
}}|>|j                  |||
      }|j                  |
|j                  d|j                  d         }|j                  1|j                  |j                  dd            j                  dd      }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j                  |      }|j                  d   }||j                  z  }|j                  |
d|j                  |      j                  dd      }|j                  |
d|j                  |      j                  dd      }|j                  |
d|j                  |      j                  dd      }|j                   |j!                  |      }|j"                  |j#                  |      }| |||      }|j$                  s	 |||      }t'        j(                  ||||dd      }|j                  dd      j+                  |
d|j                  |z        }|j-                  |j.                        } |j0                  d	   |      } |j0                  d   |      }|	dk(  r$|j                  dd
      j+                  |
      }|j                  1|j                  |j                  dd            j                  dd      }|j                  |      }|j-                  |j.                        } |j0                  d	   |      } |j0                  d   |      }|	dk(  r$|j                  dd
      j+                  |
      }t3        j4                  ||g      }|j6                  r||z   }||j8                  z  }|S ru  )r	  r  r]   r   r   r  r   r  r  r   rJ   rc   ra   r  rd   re   r^   r_   rT   rq   rM   r   r   r   rj   r   r  r-   r,   )rt   rr  r   r   r   r  rs  r  r  r  r   r  r  r  r  r  r  r   r   r   r
  rQ   r  s                          rw   r   z#PAGHunyuanAttnProcessor2_0.__call__  s$    	1 ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM 0=/B/B1/E,, (='D##J_JeJe 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN??& $0A0K0KAq0Q R \ \]^`a b		+, ($5!__$($C$CDY$Z!ii-.		/0IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C '$U,<=E**&s,<= ::33RW
 .771=EEjRTVZV`V`ckVkl-00= +DKKN+<=*DKKN+<=? 1 ; ;B C K KJX_agin o ??& $0A0K0KAq0Q R \ \]^`a b II&78-00= +DKKN+<=*DKKN+<=? 1 ; ;B C K KJX_agin o 		#46G"HI##)H4M%(B(BBrx   r
  r  r  rx   rw   r|  r|    s     9=15'+37kk ||k  (5	k
 !.k u||$k #5<<0k 
krx   r|  c                       e Zd ZdZd Z	 	 	 	 ddedej                  deej                     deej                     deej                     d	eej                     d
ej                  fdZ	y)PAGCFGHunyuanAttnProcessor2_0r}  c                 :    t        t        d      st        d      y )NrM   z]PAGCFGHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z&PAGCFGHunyuanAttnProcessor2_0.__init__}  s"    q89o  :rx   Nrr  r   r   r   r  rs  r{   c                 v   ddl m} |}|j                  |j                  ||      }|j                  }	|	dk(  r7|j                  \  }
}}}|j                  |
|||z        j                  dd      }|j                  d      \  }}}t        j                  ||g      }||j                  n|j                  \  }
}}|>|j                  |||
      }|j                  |
|j                  d|j                  d         }|j                  1|j                  |j                  dd            j                  dd      }|j                  |      }||}n|j                  r|j                  |      }|j!                  |      }|j#                  |      }|j                  d   }||j                  z  }|j                  |
d|j                  |      j                  dd      }|j                  |
d|j                  |      j                  dd      }|j                  |
d|j                  |      j                  dd      }|j$                  |j%                  |      }|j&                  |j'                  |      }| |||      }|j(                  s	 |||      }t+        j,                  ||||dd	      }|j                  dd      j/                  |
d|j                  |z        }|j1                  |j2                        } |j4                  d
   |      } |j4                  d   |      }|	dk(  r$|j                  dd      j/                  |
      }|j                  1|j                  |j                  dd            j                  dd      }|j#                  |      }|j1                  |j2                        } |j4                  d
   |      } |j4                  d   |      }|	dk(  r$|j                  dd      j/                  |
      }t        j                  ||g      }|j6                  r||z   }||j8                  z  }|S )Nr6   r  r  r	   r   rG   r	  Fr  r   r   )r	  r  r]   r   r   r  r   r  r   r  r  r   rJ   rc   ra   r  rd   re   r^   r_   rT   rq   rM   r   r   r   rj   r-   r,   )rt   rr  r   r   r   r  rs  r  r  r  r   r  r  r  r  r  r  r  r   r   r   r
  rQ   r  s                           rw   r   z&PAGCFGHunyuanAttnProcessor2_0.__call__  s@    	1 ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM FSEXEXYZE[B/1B!II';=N&OP (='D##J_JeJe 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN??& $0A0K0KAq0Q R \ \]^`a b		+, ($5!__$($C$CDY$Z!ii-.		/0IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C '$U,<=E**&s,<= ::33RW
 .771=EEjRTVZV`V`ckVkl-00= +DKKN+<=*DKKN+<=? 1 ; ;B C K KJX_agin o ??& $0A0K0KAq0Q R \ \]^`a b II&78-00= +DKKN+<=*DKKN+<=? 1 ; ;B C K KJX_agin o 		#46G"HI##)H4M%(B(BBrx   r
  r  r  rx   rw   r  r  v  s     9=15'+37ll ||l  (5	l
 !.l u||$l #5<<0l 
lrx   r  c                       e Zd ZdZd Z	 	 	 	 ddedej                  dej                  deej                     deej                     d	eej                     d
ee	   dej                  fdZ
y)LuminaAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the LuminaNextDiT model. It applies a s normalization layer and rotary embedding on query and key vector.
    c                 :    t        t        d      st        d      y r  rn  rp  s    rw   rO   zLuminaAttnProcessor2_0.__init__  r  rx   Nrr  r   r   r   query_rotary_embkey_rotary_embbase_sequence_lengthr{   c                 
   ddl m} |j                  }	|	dk(  r7|j                  \  }
}}}|j	                  |
|||z        j                  dd      }|j                  \  }
}}|j                  |      }|j                  |      }|j                  |      }|j                  d   }|j                  d   }||j                  z  }|j                  }||z  }|j                  |j                  |      }|j                  |j                  |      }|j	                  |
d|j                  |      }|j	                  |
d||      }|j	                  |
d||      }| |||d      }| |||d      }|j                  |      |j                  |      }}|d }nE|7t        j                  t        j                   ||            |j"                  z  }n|j"                  }|j                  |z  }|dk\  rh|j%                  d      j'                  ddd|d      j)                  dd      }|j%                  d      j'                  ddd|d      j)                  dd      }|j+                         j	                  |
ddd      }|j-                  d|j                  |d      }|j                  dd      }|j                  dd      }|j                  dd      }t/        j0                  |||||	      }|j                  dd      j                  |      }|S )
Nr6   r  r  r	   rG   F)r^  r   )r  rX   )r	  r  r   r   r  r   rc   rd   re   r   r   r^   r_   r   r@  rA  logrX   r  r:  rw  r0  r+  rq   rM   )rt   rr  r   r   r   r  r  r  r  r  r   r  r  r  r  r   r   r   r
  r   rQ   r  r   r   softmax_scalen_reps                             rw   r   zLuminaAttnProcessor2_0.__call__  s    	1"''
?1>1D1D.J)..z7FUNS]]^_abcM)6)<)<&
OQ 		-(ii-.		/0KKO	IIbM	

* ( ;;"KK&E;;"++c"C

:r4::x@hhz2x:

:r8X> '$U,<uME%"3GCXXe_cffUms ! M#/ $		$((?DX*Y Z]a]g]g g $

 

h&A:--"))!Q5!<DDQJCOOA&--aAua@HHANE (,,.33J1bI'..r4::PRS1%mmAq!1% 663}
 &//1588?rx   r
  )r   r,  r-  r.  rO   r   r   r2  r   r/  r   r  rx   rw   r  r    s    
r 263715.2SS ||S  %||	S
 !.S #5<<0S !.S 'smS 
Srx   r  c                       e Zd ZdZd Z	 	 	 ddedej                  deej                     deej                     deej                     d	ej                  fd
Z	y)FusedAttnProcessor2_0u  
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). It uses
    fused projection layers. For self-attention modules, all projection matrices (i.e., query, key, value) are fused.
    For cross-attention modules, key and value projection matrices are fused.

    <Tip warning={true}>

    This API is currently 🧪 experimental in nature and can change in future.

    </Tip>
    c                 :    t        t        d      st        d      y )NrM   z`FusedAttnProcessor2_0 requires at least PyTorch 2.0, to use it. Please upgrade PyTorch to > 2.0.rn  rp  s    rw   rO   zFusedAttnProcessor2_0.__init___  s"    q89r  :rx   Nrr  r   r   r   r  r{   c                 <   t        |      dkD  s|j                  dd       d}t        dd|       |}	|j                  |j                  ||      }|j                  }
|
dk(  r7|j
                  \  }}}}|j                  ||||z        j                  dd      }||j
                  n|j
                  \  }}}|>|j                  |||      }|j                  ||j                  d|j
                  d         }|j                  1|j                  |j                  dd            j                  dd      }|@|j                  |      }|j
                  d   d	z  }t        j                  ||d
      \  }}}nl|j                  r|j                  |      }|j!                  |      }|j#                  |      }|j
                  d   dz  }t        j                  ||d
      \  }}|j
                  d   }||j                  z  }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j$                  |j%                  |      }|j&                  |j'                  |      }t)        j*                  ||||dd      }|j                  dd      j-                  |d|j                  |z        }|j/                  |j0                        } |j2                  d   |      } |j2                  d   |      }|
dk(  r$|j                  dd      j-                  |      }|j4                  r||	z   }||j6                  z  }|S )Nr   rX   r  r  r  r6   r	   rG   r   r   r	  Fr  r   )r   r  r   r]   r   r   r  r   r  r   rJ   r  r   r  ra   r  rc   r  r^   r_   rq   rM   r   r   r   rj   r-   r,   )rt   rr  r   r   r   r  r  rk  r  r  r  r   r  r  r  r  r   r  r  r   r   r
  rz  rQ   r  s                            rw   r   zFusedAttnProcessor2_0.__call__e  sz    t9q=FJJw5A #Ugw(;< ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN??& OOM,C,CAq,IJTTUVXYZM (++m,C2!+J %C DE3(,(G(GH](^%IIm,E12B"*JR<JCIIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP;;"KK&E;;"++c"C 6633RW
 &//15==j"djj[cNcd%((5 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   r  r  r  rx   rw   r  r  R  s    
 9=15'+TT ||T  (5	T
 !.T u||$T 
Trx   r  c                        e Zd ZdZ	 	 	 	 	 	 	 ddededee   dee   dededee   f fd	Z		 	 dd
e
dej                  deej                     deej                     dej                  f
dZ xZS )r   az  
    Processor for implementing memory efficient attention using xFormers for the Custom Diffusion method.

    Args:
    train_kv (`bool`, defaults to `True`):
        Whether to newly train the key and value matrices corresponding to the text features.
    train_q_out (`bool`, defaults to `True`):
        Whether to newly train query matrices corresponding to the latent image features.
    hidden_size (`int`, *optional*, defaults to `None`):
        The hidden size of the attention layer.
    cross_attention_dim (`int`, *optional*, defaults to `None`):
        The number of channels in the `encoder_hidden_states`.
    out_bias (`bool`, defaults to `True`):
        Whether to include the bias parameter in `train_q_out`.
    dropout (`float`, *optional*, defaults to 0.0):
        The dropout probability to use.
    attention_op (`Callable`, *optional*, defaults to `None`):
        The base
        [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to use
        as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best operator.
    r   r   r   r   r(   r   r   c                 J   t         |           || _        || _        || _        || _        || _        | j                  rBt        j                  |xs ||d      | _	        t        j                  |xs ||d      | _
        | j                  rt        j                  ||d      | _        t        j                  g       | _        | j                  j                  t        j                  |||             | j                  j                  t        j                  |             y y r  )rN   rO   r   r   r   r   r   r   rb   r   r  r  ri   r  rk   rl   )	rt   r   r   r   r   r(   r   r   rv   s	           rw   rO   z-CustomDiffusionXFormersAttnProcessor.__init__  s     	 &&#6 ( ==)+3F3U+Wbin)oD&)+3F3U+Wbin)oD&)+;RW)XD&+-==+<D(((//		+{Ya0bc((//

70CD	 rx   rr  r   r   r   r{   c                    ||j                   n|j                   \  }}}|j                  |||      }| j                  r?| j                  |      j	                  |j
                  j                  j                        }n>|j                  |j	                  |j
                  j                  j                              }|d}	|}nd}	|j                  r|j                  |      }| j                  r| j                  |j	                  | j                  j                  j                              }
| j                  |j	                  | j                  j                  j                              }|
j	                  |j
                  j                  j                        }
|j	                  |j
                  j                  j                        }n"|j                  |      }
|j                  |      }|	rjt        j                   |
      }|d d d dd d f   dz  |d d d dd d f<   ||
z  d|z
  |
j#                         z  z   }
||z  d|z
  |j#                         z  z   }|j%                  |      j'                         }|j%                  |
      j'                         }
|j%                  |      j'                         }t(        j*                  j-                  ||
||| j.                  |j0                        }|j	                  |j                        }|j3                  |      }| j                  r, | j4                  d   |      } | j4                  d   |      }|S  |j6                  d   |      } |j6                  d   |      }|S )NFTr6   r	  r   r   )r   r  r   r  r   rc   r   r   ra   r  r   r   r  rd   re   r   r  r  r   r  r   r   r   r   rX   r   r  rj   )rt   rr  r   r   r   r   r  r   r   r  r   r
  r  s                rw   r   z-CustomDiffusionXFormersAttnProcessor.__call__  s    $9#@MF[FaFa 	'
OQ 44^_V`a..}=@@AQAQAWAWXEIIm..tyy/?/?/E/EFGE (I$1!I(,(G(GH](^%==,,-B-E-EdF`F`FgFgFmFm-noC../D/G/GHbHbHiHiHoHo/pqE&&))//0CHHTYY--334E))12CII34E__S)F%a!Qh/#5F1bqb!83,!f*

!<<CUNa&jELLN%BBE&&u-88:$$S)446&&u-88: ??3D<M<MUYU_U_ @ 
 &((5..}=;D88;MJM;D88;MJM 	 +DKKN=9M*DKKN=9Mrx   )TFNNTr	  Nr+  )r   r,  r-  r.  r0  r   r/  r   r   rO   r   r   r2  r   r4  r5  s   @rw   r   r     s    0 !%)-1+/EE E c]	E
 &c]E E E x(E@ 9=15>> ||>  (5	>
 !.> 
>rx   r   c                        e Zd ZdZ	 	 	 	 	 	 ddededee   dee   dedef fdZ	 	 dd	e	d
e
j                  dee
j                     dee
j                     de
j                  f
dZ xZS )r   u  
    Processor for implementing attention for the Custom Diffusion method using PyTorch 2.0’s memory-efficient scaled
    dot-product attention.

    Args:
        train_kv (`bool`, defaults to `True`):
            Whether to newly train the key and value matrices corresponding to the text features.
        train_q_out (`bool`, defaults to `True`):
            Whether to newly train query matrices corresponding to the latent image features.
        hidden_size (`int`, *optional*, defaults to `None`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`, *optional*, defaults to `None`):
            The number of channels in the `encoder_hidden_states`.
        out_bias (`bool`, defaults to `True`):
            Whether to include the bias parameter in `train_q_out`.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
    r   r   r   r   r(   r   c                 <   t         |           || _        || _        || _        || _        | j                  rBt        j                  |xs ||d      | _        t        j                  |xs ||d      | _	        | j                  rt        j                  ||d      | _
        t        j                  g       | _        | j                  j                  t        j                  |||             | j                  j                  t        j                  |             y y r  r  r  s          rw   rO   z(CustomDiffusionAttnProcessor2_0.__init__D  r  rx   rr  r   r   r   r{   c                    |j                   \  }}}|j                  |||      }| j                  r| j                  |      }n|j	                  |      }|d}	|}nd}	|j
                  r|j                  |      }| j                  r| j                  |j                  | j                  j                  j                              }
| j                  |j                  | j                  j                  j                              }|
j                  |j                  j                  j                        }
|j                  |j                  j                  j                        }n"|j                  |      }
|j                  |      }|	rjt        j                   |
      }|d d d dd d f   dz  |d d d dd d f<   ||
z  d|z
  |
j#                         z  z   }
||z  d|z
  |j#                         z  z   }|j                   d   }||j$                  z  }|j'                  |d|j$                  |      j)                  dd      }|
j'                  |d|j$                  |      j)                  dd      }
|j'                  |d|j$                  |      j)                  dd      }t+        j,                  ||
||dd      }|j)                  dd      j/                  |d|j$                  |z        }|j                  |j                        }| j                  r, | j0                  d   |      } | j0                  d   |      }|S  |j2                  d   |      } |j2                  d   |      }|S )	NFTr6   r	  rG   r	   r  r   )r   r  r   r  rc   ra   r  r   r   r   r   r   r  rd   re   r   r  r  r   r  r   rq   rM   r   r  rj   )rt   rr  r   r   r   r   r  r   r   r  r   r
  r  rQ   r  s                  rw   r   z(CustomDiffusionAttnProcessor2_0.__call__^  s    *7)<)<&
OQ44^_V`a..}=EIIm,E (I$1!I(,(G(GH](^%==,,-B-E-EdF`F`FgFgFmFm-noC../D/G/GHbHbHiHiHoHo/pqE&&))//0CHHTYY--334E ))12CII34E__S)F%a!Qh/#5F1bqb!83,!f*

!<<CUNa&jELLN%BBE!''+	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP 6633RW
 &//15==j"djj[cNcd%((5;D88;MJM;D88;MJM 	 +DKKN=9M*DKKN=9Mrx   r  r+  r  r5  s   @rw   r   r   0  s    *  %)-1EE E c]	E
 &c]E E E< 9=15AA ||A  (5	A
 !.A 
Arx   r   c                       e Zd ZdZdefdZ	 	 ddedej                  de	ej                     de	ej                     d	ej                  f
d
Z
y)r   a'  
    Processor for implementing sliced attention.

    Args:
        slice_size (`int`, *optional*):
            The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
            `attention_head_dim` must be a multiple of the `slice_size`.
    r   c                     || _         y r*  r   rt   r   s     rw   rO   zSlicedAttnProcessor.__init__  	    $rx   Nrr  r   r   r   r{   c                 T   |}|j                   }|dk(  r7|j                  \  }}}	}
|j                  |||	|
z        j                  dd      }||j                  n|j                  \  }}}|j	                  |||      }|j
                  1|j                  |j                  dd            j                  dd      }|j                  |      }|j                  d   }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  \  }}}t        j                  ||||j                  z  f|j                  |j                         }t#        |dz
  | j$                  z  dz         D ]g  }|| j$                  z  }|dz   | j$                  z  }||| }||| }|||| nd }|j'                  |||      }t        j(                  ||||       }|||| i |j+                  |      } |j,                  d   |      } |j,                  d   |      }|dk(  r$|j                  dd      j/                  |	
      }|j0                  r||z   }||j2                  z  }|S )Nr  r6   r	   rG   r   r   r   )r   r   r  r   r  rJ   rc   r   ra   r  rd   re   r   r  r   r   r   r  r   r  r  r   rj   r   r-   r,   )rt   rr  r   r   r   r  r  r   r  r  r  r  r   r   rI   r   r
  batch_size_attentionr-  i	start_idxend_idxquery_slice	key_sliceattn_mask_slice
attn_slices                             rw   r   zSlicedAttnProcessor.__call__  s    !"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ 44^_V`a??& OOM,C,CAq,IJTTUVXYZM		-(kk"o&&u- ($1!__$($C$CDY$Z!ii-.		/0$$S)&&u-05-lA!<

1BCELL`e`k`k
 ,q0T__DqHI 	:ADOO+I1u/G	'2KIg.ICQC]nYw?cgO22;	?[J:uYw/GHJ/9M)G,	: ..}= 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   r+  )r   r,  r-  r.  r/  rO   r   r   r2  r   r   r  rx   rw   r   r     sp    %3 % 9=15GG ||G  (5	G
 !.G 
Grx   r   c                       e Zd ZdZd Z	 	 	 ddddej                  deej                     deej                     d	eej                     d
ej                  fdZy)r   ah  
    Processor for implementing sliced attention with extra learnable key and value matrices for the text encoder.

    Args:
        slice_size (`int`, *optional*):
            The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
            `attention_head_dim` must be a multiple of the `slice_size`.
    c                     || _         y r*  r  r  s     rw   rO   z#SlicedAttnAddedKVProcessor.__init__  r  rx   Nrr  r   r   r   r   r  r{   c                 "   |}|j                   |j                  ||      }|j                  |j                  d   |j                  d   d      j                  dd      }|j                  \  }}}	|j	                  |||      }||}n|j
                  r|j                  |      }|j                  |j                  dd            j                  dd      }|j                  |      }
|
j                  d   }|j                  |
      }
|j                  |      }|j                  |      }|j                  |      }|j                  |      }|j                  sw|j                  |      }|j                  |      }|j                  |      }|j                  |      }t        j                   ||gd      }t        j                   ||gd      }n|}|}|
j                  \  }}}	t        j"                  ||||j$                  z  f|
j&                  |
j(                        }t+        |dz
  | j,                  z  dz         D ]g  }|| j,                  z  }|dz   | j,                  z  }|
|| }||| }|||| nd }|j/                  |||      }t        j0                  ||||       }|||| i |j3                  |      } |j4                  d   |      } |j4                  d   |      }|j                  dd      j7                  |j                        }||z   }|S )Nr   r6   rG   r	   r   r   r   )r]   r  r   r   r  ra   r  rJ   rc   r   rf   rg   r*   rd   re   r   r  r  r   r   r   r  r   r  r  r   rj   r   )rt   rr  r   r   r   r  r  r   r  r   r   rI   r  r  r   r
  r  r-  r  r  r  r  r  r  r  s                            rw   r   z#SlicedAttnAddedKVProcessor.__call__  s    !( --mTBM%**=+>+>q+A=CVCVWXCY[]^hhijlmn)6)<)<&
OQ44^_V`a ($1!__$($C$CDY$Z!(?(?1(EFPPQRTUV		-(kk"o&&u-)-9N)O&+/??;P+Q()-)?)?@^)_&+/+A+ABb+c((())M*CIIm,E((-C**51E));SAqICII?GQOE0C4E05-lA!<

1BCELL`e`k`k
 ,q0T__DqHI 	:ADOO+I1u/G	'2KIg.ICQC]nYw?cgO22;	?[J:uYw/GHJ/9M)G,	: ..}= 'A}5&A}5%//B7??O%0rx   r  r  r  rx   rw   r   r     s    % 9=15'+LL ||L  (5	L
 !.L u||$L 
Lrx   r   c                   |     e Zd ZdZdedef fdZdej                  dej                  dej                  fdZ xZ	S )	r\   aq  
    Spatially conditioned normalization as defined in https://huggingface.co/papers/2209.09002.

    Args:
        f_channels (`int`):
            The number of channels for input to group normalization layer, and output of the spatial norm layer.
        zq_channels (`int`):
            The number of channels for the quantized vector as described in the paper.
    r?   r@   c                     t         |           t        j                  |ddd      | _        t        j
                  ||ddd      | _        t        j
                  ||ddd      | _        y )Nr)  ư>Tr;   r6   r   )r:  strider  )rN   rO   r   r[   
norm_layerr=  conv_yconv_b)rt   r?   r@   rv   s      rw   rO   zSpatialNorm.__init__`  sZ    
 	,,J2SW`deiiZQqZ[\iiZQqZ[\rx   fzqr{   c                     |j                   dd  }t        j                  ||d      }| j                  |      }|| j	                  |      z  | j                  |      z   }|S )Nr   nearest)r  rV  )r   rq   interpolater  r  r  )rt   r  r  f_sizenorm_fnew_fs         rw   r   zSpatialNorm.forwardj  sU    ]]2F;#R(4;;r?:rx   )
r   r,  r-  r.  r/  rO   r   r2  r   r4  r5  s   @rw   r\   r\   U  sH    ]] ] 5<< ELL rx   r\   c                        e Zd ZdZd fd	Z	 	 	 	 	 ddedej                  deej                     deej                     deej                     de	d	eej                     fd
Z
 xZS )r   a  
    Attention processor for Multiple IP-Adapters.

    Args:
        hidden_size (`int`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`):
            The number of channels in the `encoder_hidden_states`.
        num_tokens (`int`, `Tuple[int]` or `List[int]`, defaults to `(4,)`):
            The context length of the image features.
        scale (`float` or List[`float`], defaults to 1.0):
            the weight scale of image prompt.
    c           
      b   t         |           || _        || _        t	        |t
        t        f      s|g}|| _        t	        |t              s|gt        |      z  }t        |      t        |      k7  rt        d      || _
        t        j                  t        t        |            D cg c]  }t        j                  ||d       c}      | _        t        j                  t        t        |            D cg c]  }t        j                  ||d       c}      | _        y c c}w c c}w NJ`scale` should be a list of integers with the same length as `num_tokens`.FrL   )rN   rO   r   r   r   tuplelistr   r   rZ   rX   r   ri   r  rb   r   to_v_iprt   r   r   r   rX   r   rv   s         rw   rO   zIPAdapterAttnProcessor.__init__  s    &#6 *udm4$J$%&Gc*o-Eu:Z(ijj
}}NSTWXbTcNdeRYY*KeDe
 }}NSTWXbTcNdeRYY*KeDe
 f fs   +D';D,rr  r   r   r   r  rX   r   c           
         |}|ct        |t              r|\  }}	nMd}
t        dd|
d       |j                  d   | j                  d   z
  }|d d d |d d f   |d d |d d d f   g}	}|j
                  |j                  ||      }|j                  }|dk(  r7|j                  \  }}}}|j                  ||||z        j                  dd	      }||j                  n|j                  \  }}}|j                  |||      }|j                  1|j                  |j                  dd	            j                  dd	      }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j                  |      }|j!                  |      }|j!                  |      }|j!                  |      }|j#                  |||      }t%        j&                  ||      }|j)                  |      }|t        |t*              st-        |j/                  d            }t1        |      t1        | j2                        cxk(  rt1        	      k(  s<n t5        d
t1        |       dt1        | j2                         dt1        	       d      t7        t9        || j2                  |	            D ]  \  }\  }}}|t        |t$        j:                        r|j                  dk7  rt5        d      |j                  d   |j                  d   k7  r.t5        d|j                  d    d|j                  d    d|       t        |t,              st1        |      |j                  d   k(  rt5        d|j                  d    dt1        |       d|        nd gt1        | j2                        z  }t9        	| j2                  | j<                  | j>                  |      D ]  \  }}}}}d}t        |t,              rtA        d |D              r
d}n|dk(  rd}|r;|>t        |t,              s|g|j                  d   z  }|j                  d   }tC        |      D ]  } ||d d |d d d d f         }  ||d d |d d d d f         }!|j!                  |       } |j!                  |!      }!|j#                  || d       }"t%        j&                  |"|!      }#|j)                  |#      }#tE        jF                  |d d |d d d d f   ||#j                  d   |#j                  d	         }$|$jI                  |jJ                  |jL                        }$|||   |#|$z  z  z   } | ||      }  ||      }!|j!                  |       } |j!                  |!      }!|j#                  || d       }"t%        j&                  |"|!      }|j)                  |      }|||z  z   }  |jN                  d   |      } |jN                  d   |      }|dk(  r$|j                  dd      jQ                  |      }|jR                  r||z   }||jT                  z  }|S )NYou have passed a tensor as `encoder_hidden_states`. This is deprecated and will be removed in a future release. Please make sure to update your script to pass `encoder_hidden_states` as a tuple to suppress this warning.!encoder_hidden_states not a tupler  Fstandard_warnr6   r   r  r	   "Length of ip_adapter_masks array ()) must match length of self.scale array (") and number of ip_hidden_states ()Each element of the ip_adapter_masks array should be a tensor with shape [1, num_images_for_ip_adapter, height, width]. Please use `IPAdapterMaskProcessor` to preprocess your maskNumber of masks (&) does not match number of ip images () at index #) does not match number of scales (c              3   &   K   | ]	  }|d k(    ywr   Nr  rN  ss     rw   rO  z2IPAdapterAttnProcessor.__call__.<locals>.<genexpr>       -!qAv-   Tr   rG   r   )+r   r  r   r   r   r]   r   r  r   r  rJ   rc   ra   r  rd   re   r   r  r   r  r   r   r  r  r   rX   rZ   	enumeratezipr2  r   r  rQ  r  r
   
downsampler   r   r   rj   r   r-   r,   )%rt   rr  r   r   r   r  rX   r   r  r   r  end_posr  r   r  r  r  r  r   r   r   r
  r  indexr  ip_statecurrent_ip_hidden_statesr   r  skipcurrent_num_imagesr  ip_keyip_valueip_attention_probs_current_ip_hidden_statesmask_downsamples%                                        rw   r   zIPAdapterAttnProcessor.__call__  s    ! !,/7:O7%'7C $ =wH[kpq/55a84??1;MM)!XgXq.9*1gh>:; (8%
 ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ 44^_V`a??& OOM,C,CAq,IJTTUVXYZM		-( ($1!__$($C$CDY$Z!ii-.		/0&&u-$$S)&&u-33E3O		/59..}='.5#'(8(B(B1(E#F ()S_UDT@UU 8=M9N8O P336tzz?2C D,-.a1  7@DTVZV`V`br@s6t 2E2D%| %dELL9TYY!^([ 
 zz!}q(99(/

1 ?44<NN14E3FkRWQXZ  "%.s5zTZZPQ]7R(/

1 ?114UKwP ( !%vDJJ7 HKdjj$,,FVH
 /	UC$eWgt D%&-u--D!#%eT2!&$**Q- 7)-A&"#56 q!()A!Q1*)M!N#*+CAq!QJ+O#P!%!7!7!?#'#9#9(#C-1-F-FufVZ-[*49II>PRZ4[1484J4JKd4e1*@*K*K Aq!,&5;;A>5;;A>	+ +:*<*<5;;W\WcWc*<*d(5aD]`oDo8p(p)q, %%=>F&'?@H!33F;F#55h?H)-)B)B5&RV)W&/4yy9KX/V,/3/E/EF^/_,$1E<T4T$TM_/	Ud 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   Nr  r:   NNNr:   Nr   r,  r-  r.  rO   r   r   r2  r   r   r   r4  r5  s   @rw   r   r   r  s    
6 9=15'+37__ ||_  (5	_
 !._ u||$_ _ #5<<0_rx   r   c                        e Zd ZdZd fd	Z	 	 	 	 	 ddedej                  deej                     deej                     deej                     de	d	eej                     fd
Z
 xZS )r   a  
    Attention processor for IP-Adapter for PyTorch 2.0.

    Args:
        hidden_size (`int`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`):
            The number of channels in the `encoder_hidden_states`.
        num_tokens (`int`, `Tuple[int]` or `List[int]`, defaults to `(4,)`):
            The context length of the image features.
        scale (`float` or `List[float]`, defaults to 1.0):
            the weight scale of image prompt.
    c           
         t         |           t        t        d      s"t	        | j
                  j                   d      || _        || _        t        |t        t        f      s|g}|| _        t        |t              s|gt        |      z  }t        |      t        |      k7  rt        d      || _        t!        j"                  t%        t        |            D cg c]  }t!        j&                  ||d       c}      | _        t!        j"                  t%        t        |            D cg c]  }t!        j&                  ||d       c}      | _        y c c}w c c}w )NrM   z@ requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r  FrL   )rN   rO   rp   rq   ro  rv   r   r   r   r   r  r  r   r   rZ   rX   r   ri   r  rb   r   r  r  s         rw   rO   z"IPAdapterAttnProcessor2_0.__init__I  s%   q89>>**++kl  '#6 *udm4$J$%&Gc*o-Eu:Z(ijj
}}NSTWXbTcNdeRYY*KeDe
 }}NSTWXbTcNdeRYY*KeDe
 f fs   E-Err  r   r   r   r  rX   r   c           
         |}|ct        |t              r|\  }}	nMd}
t        dd|
d       |j                  d   | j                  d   z
  }|d d d |d d f   |d d |d d d f   g}	}|j
                  |j                  ||      }|j                  }|dk(  r7|j                  \  }}}}|j                  ||||z        j                  dd	      }||j                  n|j                  \  }}}|>|j                  |||      }|j                  ||j                  d
|j                  d
         }|j                  1|j                  |j                  dd	            j                  dd	      }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j!                  |      }|j                  d
   }||j                  z  }|j                  |d
|j                  |      j                  dd	      }|j                  |d
|j                  |      j                  dd	      }|j                  |d
|j                  |      j                  dd	      }t#        j$                  ||||dd      }|j                  dd	      j'                  |d
|j                  |z        }|j)                  |j*                        }|t        |t,              st/        |j1                  d            }t3        |      t3        | j4                        cxk(  rt3        	      k(  s<n t7        dt3        |       dt3        | j4                         dt3        	       d      t9        t;        || j4                  |	            D ]  \  }\  }}}|t        |t<        j>                        r|j                  dk7  rt7        d      |j                  d   |j                  d   k7  r.t7        d|j                  d    d|j                  d    d|       t        |t.              st3        |      |j                  d   k(  rt7        d|j                  d    dt3        |       d|        nd gt3        | j4                        z  }t;        	| j4                  | j@                  | jB                  |      D ]  \  }}}}}d}t        |t.              rtE        d |D              r
d}n|dk(  rd}|r;|t        |t.              s|g|j                  d   z  }|j                  d   }tG        |      D ]c  }  ||d d | d d d d f         }! ||d d | d d d d f         }"|!j                  |d
|j                  |      j                  dd	      }!|"j                  |d
|j                  |      j                  dd	      }"t#        j$                  ||!|"d dd      }#|#j                  dd	      j'                  |d
|j                  |z        }#|#j)                  |j*                        }#tI        jJ                  |d d | d d d d f   ||#j                  d   |#j                  d	         }$|$j)                  |j*                  |jL                        }$|||    |#|$z  z  z   }f  ||      }! ||      }"|!j                  |d
|j                  |      j                  dd	      }!|"j                  |d
|j                  |      j                  dd	      }"t#        j$                  ||!|"d dd      }|j                  dd	      j'                  |d
|j                  |z        }|j)                  |j*                        }|||z  z   }  |jN                  d   |      } |jN                  d   |      }|dk(  r$|j                  d
d      j'                  |      }|jP                  r||z   }||jR                  z  }|S )Nr  r  r  Fr  r6   r   r  r	   rG   r	  r  r  r  r  r  r  r  r  r  r  c              3   &   K   | ]	  }|d k(    ywr  r  r  s     rw   rO  z5IPAdapterAttnProcessor2_0.__call__.<locals>.<genexpr>  r  r  Tr   r   )*r   r  r   r   r   r]   r   r  r   r  r   rJ   rc   ra   r  rd   re   rq   rM   r   r   r   r   r  r  r   rX   rZ   r  r  r   r2  r   r  rQ  r  r
   r  r   rj   r-   r,   )%rt   rr  r   r   r   r  rX   r   r  r   r  r  r  r   r  r  r  r  r   r   r   r
  rQ   r  r  r  r  r  r   r  r  r  r  r  r  r  r  s%                                        rw   r   z"IPAdapterAttnProcessor2_0.__call__e  s    ! !,/7:O7%'7C $ =wH[kpq/55a84??1;MM)!XgXq.9*1gh>:; (8%
 ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ %!88ZdeN ,00TZZ^MaMabdMefN??& OOM,C,CAq,IJTTUVXYZM		-( ($1!__$($C$CDY$Z!ii-.		/0IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP 6633RW
 &//15==j"djj[cNcd%((5'.5#'(8(B(B1(E#F ()S_UDT@UU 8=M9N8O P336tzz?2C D,-.a1  7@DTVZV`V`br@s6t 2E2D%| %dELL9TYY!^([ 
 zz!}q(99(/

1 ?44<NN14E3FkRWQXZ  "%.s5zTZZPQ]7R(/

1 ?114UKwP ( !%vDJJ7 HKdjj$,,FVH
 <	UC$eWgt D%&-u--D!#%eT2!&$**Q- 7)-A&"#56 q!()A!Q1*)M!N#*+CAq!QJ+O#P!'ZTZZ!R!\!\]^`a!b#+==RX#V#`#`abde#f 564R4R!68ts^c51 5N4W4WXY[\4]4e4e&DJJ,A51 5N4P4PQVQ\Q\4]1*@*K*K Aq!,&5;;A>5;;A>	+ +:*<*<5;;W\WcWc*<*d(5aD]`oDo8p(p5q8 %%=>F&'?@H#[[RXNXXYZ\]^F'}}ZTZZR\\]^`abH 01/M/Mvx43Z_0, 0H/Q/QRSUV/W/_/_"B

X(=0, 0H/J/J5;;/W,$1E<T4T$TMy<	U~ 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   r  r  r  r5  s   @rw   r   r   :  s    
@ 9=15'+37zz ||z  (5	z
 !.z u||$z z #5<<0zrx   r   c                        e Zd ZdZ	 	 	 	 ddee   f fdZ	 	 	 	 	 ddedej                  deej                     deej                     deej                     d	e
d
eej                     fdZ xZS )r   aG  
    Attention processor for IP-Adapter using xFormers.

    Args:
        hidden_size (`int`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`):
            The number of channels in the `encoder_hidden_states`.
        num_tokens (`int`, `Tuple[int]` or `List[int]`, defaults to `(4,)`):
            The context length of the image features.
        scale (`float` or `List[float]`, defaults to 1.0):
            the weight scale of image prompt.
        attention_op (`Callable`, *optional*, defaults to `None`):
            The base
            [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to
            use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best
            operator.
    r   c           
         t         |           || _        || _        || _        t        |t        t        f      s|g}|| _        t        |t              s|gt        |      z  }t        |      t        |      k7  rt        d      || _        t        j                  t        t        |            D cg c]  }t        j                  |xs ||d        c}      | _        t        j                  t        t        |            D cg c]  }t        j                  |xs ||d        c}      | _        y c c}w c c}w r  )rN   rO   r   r   r   r   r  r  r   r   rZ   rX   r   ri   r  rb   r   r  )rt   r   r   r   rX   r   r   rv   s          rw   rO   z'IPAdapterXFormersAttnProcessor.__init__6  s    	&#6 (*udm4$J$%&Gc*o-Eu:Z(ijj
}}]bcfgqcr]stXYRYY*9k;USt
 }}]bcfgqcr]stXYRYY*9k;USt
 u us   2#D6#D;rr  r   r   r   r  rX   r   c           
         |}|ct        |t              r|\  }}	nMd}
t        dd|
d       |j                  d   | j                  d   z
  }|d d d |d d f   |d d |d d d f   g}	}|j
                  |j                  ||      }|j                  }|dk(  r7|j                  \  }}}}|j                  ||||z        j                  dd	      }||j                  n|j                  \  }}}|6|j                  |||      }|j                  \  }}}|j                  d
|d
      }|j                  1|j                  |j                  dd	            j                  dd	      }|j                  |      }||}n|j                  r|j                  |      }|j                  |      }|j!                  |      }|j#                  |      j%                         }|j#                  |      j%                         }|j#                  |      j%                         }t&        j(                  j+                  ||||| j,                        }|j/                  |j0                        }|j3                  |      }	rp|t        |t4              st7        |j9                  d            }t;        |      t;        | j<                        cxk(  rt;        |	      k(  s<n t?        dt;        |       dt;        | j<                         dt;        |	       d      tA        tC        || j<                  |	            D ]  \  }\  }}}|t        |tD        jF                        r|j                  dk7  rt?        d      |j                  d   |j                  d   k7  r.t?        d|j                  d    d|j                  d    d|       t        |t6              st;        |      |j                  d   k(  rt?        d|j                  d    dt;        |       d|        nd gt;        | j<                        z  }tC        |	| j<                  | jH                  | jJ                  |      D ]  \  }}}}}d}t        |t6              rtM        d |D              r
d}n|dk(  rd}|r;||j/                  tD        jN                        }t        |t6              s|g|j                  d   z  }|j                  d   }tQ        |      D ]8  } ||d d |d d d d f         }  ||d d |d d d d f         }!|j#                  |       j%                         } |j#                  |!      j%                         }!t&        j(                  j+                  || |!| j,                        }"|"j/                  |j0                        }"|j3                  |"      }"tS        jT                  |d d |d d d d f   ||"j                  d   |"j                  d	         }#|#j/                  |j0                  |jV                        }#|||   |"|#z  z  z   };  ||      }  ||      }!|j#                  |       j%                         } |j#                  |!      j%                         }!t&        j(                  j+                  || |!| j,                        }|j/                  |j0                        }|j3                  |      }|||z  z   }  |jX                  d   |      } |jX                  d   |      }|dk(  r$|j                  d
d      j[                  |      }|j\                  r||z   }||j^                  z  }|S )Nr  r  r  Fr  r6   r   r  r	   rG   )r  r  r  r  r  r  r  r  r  r  r  c              3   &   K   | ]	  }|d k(    ywr  r  r  s     rw   rO  z:IPAdapterXFormersAttnProcessor.__call__.<locals>.<genexpr>  s     1a161r  T)r  r   r   )0r   r  r   r   r   r]   r   r  r   r  r+  rJ   rc   ra   r  rd   re   r   r  r   r   r   r   r   r   r   r   r  r  r   rX   rZ   r  r  r   r2  r   r  rQ  r<  r  r
   r  r   rj   r   r-   r,   )$rt   rr  r   r   r   r  rX   r   r  r   r  r  r  r   r  r  r  r  r   r-  r   r   r
  r  r  r  r  r   r  r  r  r  r  r  r  r  s$                                       rw   r   z'IPAdapterXFormersAttnProcessor.__call__U  s    ! !,/7:O7%'7C $ =wH[kpq/55a84??1;MM)!XgXq.9*1gh>:; (8%
 ( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM $9#@MF[FaFa 	'
OQ %!88ZdeN "/!4!4A|Q+222|RHN??& OOM,C,CAq,IJTTUVXYZM		-( ($1!__$($C$CDY$Z!ii-.		/0&&u-88:$$S)446&&u-88: ??3D<M<M @ 
 &((5..}=+!"2D9'+,<,F,Fq,I'J$,-TZZYCHXDYY$<SAQ=R<S T77:4::6G H 012!5  ;D,djj:JK; 66eX  <$)$=a",!_# 
  ::a=HNN1,==","3DJJqM? C88@q8I7J+V[U\!^#  &eT23u:TU;V","3DJJqM? C558ZLE7!T# #, %)6C

O#;  LO $**dllDLLJZL 3YG(%'4 eT*1511#aZD'#wwu}}5)%6%*Gdjjm$;E-1ZZ]*!&'9!: uA%,-EaAqj-Q%RF'./G1aQR
/S'TH%)%;%;F%C%N%N%PF'+'='=h'G'R'R'TH8@8_8_ %vxD<M<M 9` 95 9R8T8TUZU`U`8a58<8N8NOh8i5.D.O.O $Q1aZ 0 * 9 ? ? B 9 ? ? B	/O />.@.@u{{[`[g[g.@.hO,9E!HHadsHs<t,tM+u. "))A!B#*+C#D!%!7!7!?!J!J!L#'#9#9(#C#N#N#P3;<<3Z3Z!688I8I 4[ 40 4L3N3Nu{{3[0373I3IJb3c0(5@X8X(Xg3Yl 'A}5&A}5?)33B;CCJPWY_afgM##)H4M%(B(BBrx   )Nr  r:   Nr  )r   r,  r-  r.  r   r   rO   r   r   r  r   r   r4  r5  s   @rw   r   r   "  s    , !+/
 x(
F >B6:,08<rr ((r  ((9(9:	r
 !!2!23r u(()r r #5#4#45rrx   r   c                        e Zd ZdZ	 	 ddededededef
 fdZ	 	 	 	 dded	ej                  d
ej                  de
ej                     dej                  dej                  dej                  fdZ xZS )!SD3IPAdapterJointAttnProcessor2_0aZ  
    Attention processor for IP-Adapter used typically in processing the SD3-like self-attention projections, with
    additional image-based information and timestep embeddings.

    Args:
        hidden_size (`int`):
            The number of hidden channels.
        ip_hidden_states_dim (`int`):
            The image feature dimension.
        head_dim (`int`):
            The number of head channels.
        timesteps_emb_dim (`int`, defaults to 1280):
            The number of input channels for timestep embedding.
        scale (`float`, defaults to 0.5):
            IP-Adapter scale.
    r   ip_hidden_states_dimr  timesteps_emb_dimrX   c                 0   t         |           ddlm}m}  |||dz  dd      | _        t        j                  ||d      | _        t        j                  ||d      | _	         ||d      | _
         ||d      | _         ||d      | _        || _        y )Nr6   )AdaLayerNormr9   r	   r  )
output_dimnorm_eps	chunk_dimFrL   )rN   rO   rP   r  r9   norm_ipr   rb   r   r  r^   r_   	norm_ip_krX   )	rt   r   r  r  r  rX   r  r9   rv   s	           rw   rO   z*SD3IPAdapterJointAttnProcessor2_0.__init__  s     	 	9#$5BVYZBZeiuvwyy!5{Oyy!5{Oh-h- 40
rx   rr  r   r   r   r   r  r{   c                 	   |}|j                   d   }|j                  |      }	|j                  |      }
|j                  |      }|
j                   d   }||j                  z  }|	j                  |d|j                  |      j                  dd      }	|
j                  |d|j                  |      j                  dd      }
|j                  |d|j                  |      j                  dd      }|	}|
}|}|j                  |j                  |	      }	|j                  |j                  |
      }
|B|j                  |      }|j                  |      }|j                  |      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }|j                  |j                  |      }|j                  |j                  |      }t        j                  |	|gd      }	t        j                  |
|gd      }
t        j                  ||gd      }t!        j"                  |	|
|dd	      }|j                  dd      j%                  |d|j                  |z        }|j'                  |	j(                        }|M|ddd|j                   d   f   |dd|j                   d   df   }}|j*                  s|j-                  |      }| j.                  dk7  rp|m| j1                  ||
      }| j3                  |      }| j5                  |      }|j                  |d|j                  |      j                  dd      }|j                  |d|j                  |      j                  dd      }| j                  |      }	| j                  |      }| j7                  |      }t        j                  ||gd      }
t        j                  ||gd      }t!        j"                  |	|
|dd	      }|j                  dd      j                  |d|j                  |z        }|j'                  |	j(                        }||| j.                  z  z   } |j8                  d   |      } |j8                  d   |      }|||fS |S )aP  
        Perform the attention computation, integrating image features (if provided) and timestep embeddings.

        If `ip_hidden_states` is `None`, this is equivalent to using JointAttnProcessor2_0.

        Args:
            attn (`Attention`):
                Attention instance.
            hidden_states (`torch.FloatTensor`):
                Input `hidden_states`.
            encoder_hidden_states (`torch.FloatTensor`, *optional*):
                The encoder hidden states.
            attention_mask (`torch.FloatTensor`, *optional*):
                Attention mask.
            ip_hidden_states (`torch.FloatTensor`, *optional*):
                Image embeddings.
            temb (`torch.FloatTensor`, *optional*):
                Timestep embeddings.

        Returns:
            `torch.FloatTensor`: Output hidden states.
        r   rG   r6   r	   Nr   r	  Fr  )r  )r   rc   rd   re   r   r  r   r^   r_   rh   rf   rg   rn   ro   r   r  rq   rM   r   r   r   rV   rm   rX   r  r   r  r  rj   )rt   rr  r   r   r   r   r  r  r   r   r   r
  rQ   r  	img_queryimg_key	img_valuer  r  r  norm_ip_hidden_statesr  r  s                          rw   r   z*SD3IPAdapterJointAttnProcessor2_0.__call__1  s   > !"((+
 		-(ii&		-(IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP		;;"KK&E;;"++c"C !,/3?T/U,-1__=R-S*/3?T/U,/O/T/TB

H0i1o - .L-P-PB

H.i1o + 0P/T/TB

H0i1o -   ,373D3DEe3f0  ,151B1BCa1b.IIu&FGQOE))S"@AqICIIu&FGQOE66uc5TWchi%//15==j"djj[cNcd%((5 , a!48>>!#4!445a!2!445 1M (((,8M(N% ::?/;$(LL1AL$M! \\"78F||$9:H [[RXFPPQRTUVF}}ZTZZJTTUVXYZH KK	*Ekk'*G^^F+F ))Wf-15CIIy(3;E ==eS%[^jop/99!Q?DDZQSUYU_U_bjUjk/225;;?),<tzz,IIM 'A}5&A}5 , "777  rx   )i   g      ?r
  )r   r,  r-  r.  r/  r   rO   r   r   r  r   r   r4  r5  s   @rw   r  r  
  s    , "& " 	
  2 486:.2"&A!A! ((A!  %00	A!
 !!2!23A!  ++A! A! 
		A!rx   r  c                       e Zd ZdZd Z	 	 	 ddedej                  deej                     deej                     deej                     d	ej                  fd
Z
y)PAGIdentitySelfAttnProcessor2_0
    Processor for implementing PAG using scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
    PAG reference: https://huggingface.co/papers/2403.17377
    c                 :    t        t        d      st        d      y )NrM   z_PAGIdentitySelfAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z(PAGIdentitySelfAttnProcessor2_0.__init__  s"    q89q  :rx   Nrr  r   r   r   r  r{   c                 F   |}|j                   |j                  ||      }|j                  }|dk(  r7|j                  \  }}	}
}|j                  ||	|
|z        j	                  dd      }|j                  d      \  }}|j                  \  }}}|>|j                  |||      }|j                  ||j                  d|j                  d         }|j                  1|j                  |j	                  dd            j	                  dd      }|j                  |      }|j                  |      }|j                  |      }|j                  d   }||j                  z  }|j                  |d|j                  |      j	                  dd      }|j                  |d|j                  |      j	                  dd      }|j                  |d|j                  |      j	                  dd      }t        j                  ||||dd      }|j	                  dd      j                  |d|j                  |z        }|j                  |j                         } |j"                  d   |      } |j"                  d   |      }|dk(  r$|j	                  dd	      j                  |	
      }|j                  \  }}}|j                  1|j                  |j	                  dd            j	                  dd      }|j                  |      }|j                  |j                         } |j"                  d   |      } |j"                  d   |      }|dk(  r$|j	                  dd	      j                  |	
      }t%        j&                  ||g      }|j(                  r||z   }||j*                  z  }|S )
Nr  r6   r	   rG   r	  Fr  r   r   )r]   r   r   r  r   r  r  r   rJ   rc   rd   re   rq   rM   r   r   r   rj   r   r  r-   r,   )rt   rr  r   r   r   r  r  r  r   r  r  r  r  r  r  r   r   r   r
  rQ   r  s                        rw   r   z(PAGIdentitySelfAttnProcessor2_0.__call__  s    !( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM 0=/B/B1/E,, *;)@)@&
OQ%!88ZdeN ,00TZZ^MaMabdMefN??& $0A0K0KAq0Q R \ \]^`a b		+,ii)*		+,IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP ::33RW
 .771=EEjRTVZV`V`ckVkl-00= +DKKN+<=*DKKN+<=? 1 ; ;B C K KJX_agin o *;)@)@&
OQ??& $0A0K0KAq0Q R \ \]^`a b II&78-00= +DKKN+<=*DKKN+<=? 1 ; ;B C K KJX_agin o 		#46G"HI##)H4M%(B(BBrx   r  r   r,  r-  r.  rO   r   r   r  r   r2  r   r  rx   rw   r  r    s    
 >B6:,0TT ((T  ((9(9:	T
 !!2!23T u(()T 
Trx   r  c                       e Zd ZdZd Z	 	 	 ddedej                  deej                     deej                     deej                     d	ej                  fd
Z
y)"PAGCFGIdentitySelfAttnProcessor2_0r   c                 :    t        t        d      st        d      y )NrM   zbPAGCFGIdentitySelfAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rn  rp  s    rw   rO   z+PAGCFGIdentitySelfAttnProcessor2_0.__init__  s"    q89t  :rx   Nrr  r   r   r   r  r{   c                 z   |}|j                   |j                  ||      }|j                  }|dk(  r7|j                  \  }}	}
}|j                  ||	|
|z        j	                  dd      }|j                  d      \  }}}t        j                  ||g      }|j                  \  }}}|>|j                  |||      }|j                  ||j                  d|j                  d         }|j                  1|j                  |j	                  dd            j	                  dd      }|j                  |      }|j                  |      }|j                  |      }|j                  d   }||j                  z  }|j                  |d|j                  |      j	                  dd      }|j                  |d|j                  |      j	                  dd      }|j                  |d|j                  |      j	                  dd      }t        j                  ||||dd      }|j	                  dd      j!                  |d|j                  |z        }|j#                  |j$                        } |j&                  d	   |      } |j&                  d   |      }|dk(  r$|j	                  dd
      j!                  |	
      }|j                  \  }}}|j                  1|j                  |j	                  dd            j	                  dd      }|j                  |      }|}|j#                  |j$                        } |j&                  d	   |      } |j&                  d   |      }|dk(  r$|j	                  dd
      j!                  |	
      }t        j                  ||g      }|j(                  r||z   }||j*                  z  }|S )Nr  r6   r	   r   rG   r	  Fr  r   r   )r]   r   r   r  r   r  r   r  r  r   rJ   rc   rd   re   rq   rM   r   r   r   rj   r-   r,   )rt   rr  r   r   r   r  r  r  r   r  r  r  r  r  r  r  r   r   r   r
  rQ   r  s                         rw   r   z+PAGCFGIdentitySelfAttnProcessor2_0.__call__$  s    !( --mTBM"''
?1>1D1D.J)..z7FUNS]]^_abcM FSEXEXYZE[B/1B!II';=N&OP *;)@)@&
OQ%!88ZdeN ,00TZZ^MaMabdMefN??& $0A0K0KAq0Q R \ \]^`a b		+,ii)*		+,IIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP ::33RW
 .771=EEjRTVZV`V`ckVkl-00= +DKKN+<=*DKKN+<=? 1 ; ;B C K KJX_agin o *;)@)@&
OQ??& $0A0K0KAq0Q R \ \]^`a b		+,!-00= +DKKN+<=*DKKN+<=? 1 ; ;B C K KJX_agin o 		#46G"HI##)H4M%(B(BBrx   r  r  r  rx   rw   r  r    s    
 >B6:,0XX ((X  ((9(9:	X
 !!2!23X u(()X 
Xrx   r  c                   L    e Zd ZdZdedej                  dej                  fdZy)rQ  zD
    Processor for implementing multiscale quadratic attention.
    rr  r   r{   c                    |j                   dd  \  }}||z  |j                  kD  rd}nd}|}t        |j                               \  }}}}|j                  }	|j                  dd      }|j                  |      }
|j                  |      }|j                  |      }t        j                  |
||gd      }|j                  dd      }|g}|j                  D ]  }|j                   ||              t        j                  |d      }|r |j                  t        j                        }|j                  |dd|j                  z  ||z        }|j!                  dd	      \  }
}}|j#                  |
      }
|j#                  |      }|r&|j%                  |
||      }|j                  |	      }n|j'                  |
||      }t        j                  ||d||f      }|j)                  |j                  dd            j                  dd      }|j*                  d
k(  r2|j-                  |j                  dd            j                  dd      }n|j-                  |      }|j.                  r||z   }|S )Nr   TFr6   rG   r   r   rW  r	   rD   )r   rF  r  r  r   movedimrc   rd   re   r   r  rM  rk   r   rY  r   r  rO  r\  r`  rj   rH  rP  r-   )rt   rr  r   r  r  use_linear_attentionr  r   r   original_dtyper   r   r
  multi_scale_qkvblocks                  rw   r   z'SanaMultiscaleAttnProcessor2_0.__call__  sb   %++BC0E>D333#' #(  '+M,>,>,@'A$
Avu&,,%--a4		-(ii&		-(		5#u"51=%--b!4(/++ 	9E""5#78	9 		/q9),,5==,AM%--j"a$BYBY>Y[adi[ij)//q/9sE!!%($ 77sEJM),,>,BM ::5#uMMmj"fe5TUM$9$9!R$@AII"aP>>Z' MM-*?*?2*FGOOPRTUVM MM-8M##)H4Mrx   N)r   r,  r-  r.  rD  r   r2  r   r  rx   rw   rQ  rQ    s-    4: 45<< 4\a\h\h 4rx   rQ  c                       e Zd ZdZd Zy)LoRAAttnProcessorz9
    Processor for implementing attention with LoRA.
    c                      y r*  r  rp  s    rw   rO   zLoRAAttnProcessor.__init__      rx   Nr   r,  r-  r.  rO   r  rx   rw   r  r        rx   r  c                       e Zd ZdZd Zy)LoRAAttnProcessor2_0zj
    Processor for implementing attention with LoRA (enabled by default if you're using PyTorch 2.0).
    c                      y r*  r  rp  s    rw   rO   zLoRAAttnProcessor2_0.__init__  r  rx   Nr  r  rx   rw   r  r    r  rx   r  c                       e Zd ZdZd Zy)LoRAXFormersAttnProcessorzH
    Processor for implementing attention with LoRA using xFormers.
    c                      y r*  r  rp  s    rw   rO   z"LoRAXFormersAttnProcessor.__init__  r  rx   Nr  r  rx   rw   r  r    r  rx   r  c                       e Zd ZdZd Zy)LoRAAttnAddedKVProcessorzz
    Processor for implementing attention with LoRA with extra learnable key and value matrices for the text encoder.
    c                      y r*  r  rp  s    rw   rO   z!LoRAAttnAddedKVProcessor.__init__  r  rx   Nr  r  rx   rw   r  r    r  rx   r  c                       e Zd ZdZ	 	 d	dedej                  deej                     deej                     dej                  f
dZy)
SanaLinearAttnProcessor2_0I
    Processor for implementing scaled dot-product linear attention.
    Nrr  r   r   r   r{   c                    |j                   }||}|j                  |      }|j                  |      }|j                  |      }|j                  |j	                  |      }|j
                  |j                  |      }|j                  dd      j                  d|j                  df      }|j                  dd      j                  d|j                  df      j                  dd      }|j                  dd      j                  d|j                  df      }t        j                  |      }t        j                  |      }|j                         |j                         |j                         }}}t        j                  |ddd      }t        j                  ||      }	t        j                  |	|      }|d d d d d df   |d d d d dd f   d	z   z  }|j                  dd      j                  dd      }|j!                  |      } |j"                  d
   |      } |j"                  d   |      }|t        j$                  k(  r|j'                  dd      }|S )Nr6   r	   rG   r   rS  rT  r:   rU  rd  r       )r   rc   rd   re   r^   r_   r   r  r   rq   relur   r  r   rX  rw  r   rj   r<  clip)
rt   rr  r   r   r   r  r   r   r
  r[  s
             rw   r   z#SanaLinearAttnProcessor2_0.__call__  s	    ',, ($1!		-(ii-.		/0;;"KK&E;;"++c"C1%//DJJ3CDmmAq!++A

B/?@JJ1aP1%//DJJ3CDuffSk!KKM399;Ese\
#FeS)VU3%aCRCi0M!Q)4Lu4TU%--a3==aC%((8&A}5&A}5U]]*)..vu=Mrx   r+  r  r  rx   rw   r  r    s^     9=15,, ||,  (5	,
 !., 
,rx   r  c                       e Zd ZdZ	 	 d	dedej                  deej                     deej                     dej                  f
dZy)
 PAGCFGSanaLinearAttnProcessor2_0r   Nrr  r   r   r   r{   c                 T   |j                   }|j                  d      \  }}}t        j                  ||g      }|j	                  |      }	|j                  |      }
|j                  |      }|	j                  dd      j                  d|j                  df      }	|
j                  dd      j                  d|j                  df      j                  dd      }
|j                  dd      j                  d|j                  df      }t        j                  |	      }	t        j                  |
      }
|	j                         |
j                         |j                         }}
}	t        j                  |ddd      }t        j                  ||
      }t        j                  ||	      }|d d d d d df   |d d d d dd f   d	z   z  }|j                  dd      j                  dd      }|j!                  |      } |j"                  d
   |      } |j"                  d   |      }|j                  |      j!                  |      } |j"                  d
   |      } |j"                  d   |      }t        j                  ||g      }|t        j$                  k(  r|j'                  dd      }|S )Nr   r6   r	   rG   rS  rT  r:   rU  rd  r   r"  r#  )r   r  r   r  rc   rd   re   r   r  r   rq   r$  r   r  rX  rw  r   rj   r<  r%  )rt   rr  r   r   r   r  r  r  r  r   r   r
  r[  s                rw   r   z)PAGCFGSanaLinearAttnProcessor2_0.__call__  sj    ',,EREXEXYZE[B/1B!II';=N&OP		+,ii)*		+,1%//DJJ3CDmmAq!++A

B/?@JJ1aP1%//DJJ3CDuffSk!KKM399;Ese\
#FeS)!LL7-aCRCi8<MaQRTVTWi<X[`<`a-55a;EEaK-00@*DKKN+<=*DKKN+<= !II&78;;NK*DKKN+<=*DKKN+<=		#46G"HIU]]*)..vu=Mrx   r+  r  r  rx   rw   r'  r'    s^     9=15// ||/  (5	/
 !./ 
/rx   r'  c                       e Zd ZdZ	 	 d	dedej                  deej                     deej                     dej                  f
dZy)
%PAGIdentitySanaLinearAttnProcessor2_0r   Nrr  r   r   r   r{   c                    |j                   }|j                  d      \  }}|j                  |      }|j                  |      }	|j	                  |      }
|j                  dd      j                  d|j                  df      }|	j                  dd      j                  d|j                  df      j                  dd      }	|
j                  dd      j                  d|j                  df      }
t        j                  |      }t        j                  |	      }	|j                         |	j                         |
j                         }
}	}t        j                  |
ddd      }
t        j                  |
|	      }t        j                  ||      }|j                   t        j                  t        j                  fv r|j                         }|d d d d d df   |d d d d dd f   d	z   z  }|j!                  dd      j                  dd      }|j#                  |      } |j$                  d
   |      } |j$                  d   |      }|j	                  |      j#                  |      } |j$                  d
   |      } |j$                  d   |      }t        j&                  ||g      }|t        j                  k(  r|j)                  dd      }|S )Nr	   r6   rG   r   rS  rT  r:   rU  rd  r   r"  r#  )r   r  rc   rd   re   r   r  r   rq   r$  r   r  r   rX  r<  r=  rw  r   rj   r  r%  )rt   rr  r   r   r   r  r  r  r   r   r
  r[  s               rw   r   z.PAGIdentitySanaLinearAttnProcessor2_0.__call__O  sz    ',,/</B/B1/E,,		+,ii)*		+,1%//DJJ3CDmmAq!++A

B/?@JJ1aP1%//DJJ3CDuffSk!KKM399;Ese\
#FeS)!LL7""u}}enn&EE 1 7 7 9-aCRCi8<MaQRTVTWi<X[`<`a-55a;EEaK-00@*DKKN+<=*DKKN+<= !II&78;;NK*DKKN+<=*DKKN+<=		#46G"HIU]]*)..vu=Mrx   r+  r  r  rx   rw   r*  r*  J  s^     9=1511 ||1  (5	1
 !.1 
1rx   r*  c                       e Zd Zd Zy)FluxAttnProcessor2_0c                 <    d}t        dd|       ddlm}  ||i |S )Nzq`FluxAttnProcessor2_0` is deprecated and this will be removed in a future version. Please use `FluxAttnProcessor`r-  r  r6   FluxAttnProcessorr   transformers.transformer_fluxr0  clsr  rk  r  r0  s        rw   __new__zFluxAttnProcessor2_0.__new__  s,     R('3FGD $1&11rx   Nr   r,  r-  r5  r  rx   rw   r-  r-        2rx   r-  c                       e Zd ZdZd Zy)FluxSingleAttnProcessor2_0rD  c                 <    d}t        dd|       ddlm}  ||i |S )Nz|`FluxSingleAttnProcessor` is deprecated and will be removed in a future version. Please use `FluxAttnProcessorSDPA` instead.r9  r  r6   r/  r1  r3  s        rw   r5  z"FluxSingleAttnProcessor2_0.__new__  s,     ].9LMD $1&11rx   Nr   r,  r-  r.  r5  r  rx   rw   r9  r9    s    2rx   r9  c                       e Zd Zd Zy)FusedFluxAttnProcessor2_0c                 <    d}t        dd|       ddlm}  ||i |S )Nzv`FusedFluxAttnProcessor2_0` is deprecated and this will be removed in a future version. Please use `FluxAttnProcessor`r=  r  r6   r/  r1  r3  s        rw   r5  z!FusedFluxAttnProcessor2_0.__new__  s,     W-w8KLD $1&11rx   Nr6  r  rx   rw   r=  r=    r7  rx   r=  c                       e Zd Zd Zy)"FluxIPAdapterJointAttnProcessor2_0c                 <    d}t        dd|       ddlm}  ||i |S )Nz`FluxIPAdapterJointAttnProcessor2_0` is deprecated and this will be removed in a future version. Please use `FluxIPAdapterAttnProcessor`r@  r  r6   )FluxIPAdapterAttnProcessor)r   r2  rB  )r4  r  rk  r  rB  s        rw   r5  z*FluxIPAdapterJointAttnProcessor2_0.__new__  s,     i6ATUM)4:6::rx   Nr6  r  rx   rw   r@  r@    s    ;rx   r@  c                       e Zd Zd Zy)FluxAttnProcessor2_0_NPUc                 P    d}t        dd|d       ddlm}  |       }d|_        |S )	NzFluxAttnProcessor2_0_NPU is deprecated and will be removed in a future version. An alternative solution to use NPU Flash Attention will be provided in the future.rD  r  Fr  r6   r/  _native_npur   r2  r0  _attention_backendr4  r  rk  r  r0  r/   s         rw   r5  z FluxAttnProcessor2_0_NPU.__new__  s9    ^ 	 	,g7JZ_`D%'	'4	$rx   Nr6  r  rx   rw   rD  rD        rx   rD  c                       e Zd Zd Zy)FusedFluxAttnProcessor2_0_NPUc                 P    d}t        dd|d       ddlm}  |       }d|_        |S )	NzFusedFluxAttnProcessor2_0_NPU is deprecated and will be removed in a future version. An alternative solution to use NPU Flash Attention will be provided in the future.rL  r  Fr  r6   r/  
_fused_npurG  )rt   r  r0  r/   s       rw   r5  z%FusedFluxAttnProcessor2_0_NPU.__new__  s9    ^ 	 	17<O_deD%'	'3	$rx   Nr6  r  rx   rw   rL  rL    rJ  rx   rL  c                       e Zd ZdZd Zy)r   rH  c                     d}t        dd|d       t        dd      rt        d      t               rt        dd	      rt        d
      ddlm} t        |      dkD  s|j                  dd       d}t        dd|        ||i |}d|_        |S )NzXLAFluxFlashAttnProcessor2_0 is deprecated and will be removed in diffusers 1.0.0. An alternative solution to using XLA Flash Attention will be provided in the future.r   r  Fr  r}   r~   rJ  r   rK  r6   r/  r   rz   zpartition_spec was not used in the processor implementation when it was added. Passing it is a no-op and support for it will be removed._native_xla)	r   r   ro  r   r2  r0  r   r  rH  rI  s         rw   r5  z$XLAFluxFlashAttnProcessor2_0.__new__  s    ` 	 	0';N^cdU+VWW9-c59deeDt9q=FJJ'7>JA   &1DE%t6v6	'4	$rx   Nr;  r  rx   rw   r   r     s    rx   r   )cr   r@  typingr   r   r   r   r   r   torch.nn.functionalr   
functionalrq   image_processorr
   utilsr   r   r   utils.import_utilsr   r   r   utils.torch_utilsr   r   
get_loggerr   r   r>  r   xformers.ops$torch_xla.experimental.custom_kernelr   torch_xla.runtimer   XLA_AVAILABLEr   r   r7  rD  rf  rg  r0   r   r   r   r   r  r  r  r   r  r  r  r  r!  r   r   r   rr   r   rT  rY  rr  rw  r|  r  r  r  r   r   r   r   r\   r   r   r   r  r  r  rQ  r  r  r  r  r  r'  r*  r-  r9  r=  r@  rD  rL  r   ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORSr   r  rx   rw   <module>r`     s     9 9     4 > > d d F 
		H	%HC'H-MM Q&		 Q& Q&h		 4?3BII ?3D?
RYY ?
Df4 f4RE EPe299 eP@ @FI IXS! S!lY4 Y4xb4 b4JL4 L4^R! R!j[ [|Z! Z!z^! ^!BB4 B4JD4 D4ND DN[ [|o od[ [|q qhR RjB BJ_ _Dd dNx xvy yx] ]@g gTq299 qhobii odT TnY Yx")) :ERYY EPe ePeUXX__ ePh! h!V` `Fd dN9 9x    1 1h4 4n6 6r2 22 22 2; ;   @  	!  &  2M 2 22 2 	2
 2  2 2 2 2 "2 2 2 2 "2 2  #!2" !#2$ %2& '2( )2* +2, -2. /20  122 324 !526 728 "92: ;2< =2> )?2@ $A2B C2D E2F G2H %I2J *K2L "M2N #O2P &Q2R S2T U2V #W2X &Y2Z $[2\ ']2^ _2` a2b c2d e24 rx   