
    bi              $       .   d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZmZmZmZ d dlZddlmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZ dZ dZ!d	Z"d
Z#dZ$ e       xr	  ede       Z% e       Z& e       xr	  ede!      Z' ede"      Z( e       Z) e       xr	  ede#      Z* e       xr	  ede$      Z+e%r	d dl,m-Z-m.Z. ndZ-dZ.e&rd dl/m-Z0 d dl/m.Z1 ndZ0dZ1e'rd dl2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 ndZ3dZ6dZ7dZ4dZ5dZ8e(rd dl9m:c m;c m<Z< e)rd dl=m>Z> ndZ>e*rd dl?m@ZA ndZAe+rd dlBmCZD ndZD eeE      ZFed   ZGed   ZHed   ZI G d deJe      ZK G d d      ZLe j                  eKj                  fdeeJeKf   fd       ZO	 	 	 	 	 	 dkdddej                  dej                  dej                  d eej                     d!eQd"eRd#eeQ   d$eRd%ee	eJef      deeK   d&ej                  fd'ZSd eej                     d"eRd&dfd(ZTdej                  dej                  dej                  d&dfd)ZUdej                  dej                  dej                  d&dfd*ZVd+eWd,eWd&efd-ZXdej                  dej                  dej                  d&dfd.ZYdej                  dej                  dej                  d&dfd/ZZ	 dldej                  dej                  dej                  d eej                     d&df
d0Z[deKd&dfd1Z\ ej                  d23      	 dld4eWd5eWd6eWd7eej                     fd8       Z_	 dld4eWd5eWd ej                  d7eej                     fd9Z`	 	 dmd4eWd5eWd6eWd eej                     d7eej                     d&dfd:Zad ej                  d4eWd;eWd&ej                  fd<Zbd= Zcej                  j                  d>d?d@A      dej                  dej                  dej                  d&eej                  ej                  f   fdB       Zfej                  j                  d>      dej                  dej                  dej                  d&eej                  ej                  f   fdC       ZheLj                  eKj                  eUeZe[gD      	 	 	 	 	 	 	 	 dndej                  dej                  dej                  d!eQd#eeQ   d"eRdEeeWeWf   dFeQdGeej                     dHeRdIeRd&ej                  fdJ       ZkeLj                  eKj                  eUeZe[gD      	 	 	 	 	 	 	 	 	 	 	 	 	 dodej                  dej                  dej                  dKeej                     dLeej                     dMeeW   dNeeW   d!eQd#eeQ   d"eRdEeeWeWf   dFeQdGeej                     dHeRdIeRd eej                     d&ej                  f"dO       ZmeLj                  eKj                  eUeZe[gD      	 	 	 	 	 	 dpdej                  dej                  dej                  d#eeQ   d"eRdEeeWeWf   dFeQdHeRdIeRd&ej                  fdP       ZoeLj                  eKj                  eUeZe[gD      	 	 	 	 	 	 	 	 	 	 	 dqdej                  dej                  dej                  dKeej                     dLeej                     dMeeW   dNeeW   d#eeQ   d"eRdEeeWeWf   dFeQdHeRdIeRd eej                     d&ej                  fdQ       ZqeLj                  eKj                  eTeUe[gD      	 	 	 	 	 	 drdej                  dej                  dej                  d eeej                  dRf      d"eRd#eeQ   d$eRdSeRdTee	eJef      d&ej                  fdU       ZseLj                  eKj                  eUe[gD      	 	 	 	 	 dsdej                  dej                  dej                  d eej                     d!eQd"eRd#eeQ   d$eRd&ej                  fdV       ZteLj                  eKj                  eUeZe[gD      	 	 	 	 	 dsdej                  dej                  dej                  d eej                     d!eQd"eRd#eeQ   d$eRd&ej                  fdW       ZveLj                  eKj                  eUe[gD      	 	 	 	 	 dsdej                  dej                  dej                  d eej                     d!eQd"eRd#eeQ   d$eRd&ej                  fdX       ZxeLj                  eKj                  eUeZe[gD      	 	 	 	 dtdej                  dej                  dej                  d!eQd"eRd#eeQ   d$eRd&ej                  fdY       ZzeLj                  eKj                  eUe[gD      	 	 	 	 	 dsdej                  dej                  dej                  d eej                     d!eQd"eRd#eeQ   d$eRd&ej                  fdZ       Z|eLj                  eKj                  eUeZe[gD      	 	 dudej                  dej                  dej                  d!eQd#eeQ   d&ej                  fd[       Z~eLj                  eKj                  eUe[gD      	 dvdej                  dej                  dej                  d"eRd&ej                  f
d\       ZeLj                  eKj                  eVeZe[gD      	 	 	 dwdej                  dej                  dej                  d"eRd#eeQ   dSeRd&ej                  fd]       ZeLj                  eKj                  eVeZe[gD      	 	 	 	 	 	 	 	 dxdej                  dej                  dej                  dKeej                     dLeej                     dMeeW   dNeeW   d"eRd#eeQ   d^eRd eej                     d&ej                  fd_       ZeLj                  eKj
                   eXd`d       e[gD      	 	 	 	 	 	 	 dydej                  dej                  dej                  d"eRd#eeQ   daeHdbeGd^eRdceRdSeRd&ej                  fdd       ZeLj                  eKj                   eXd`d       e[gD      	 	 	 	 	 	 dzdej                  dej                  dej                  d"eRd#eeQ   daeHdbeGd^eRdSeRd&ej                  fde       ZeLj                  eKj                   eXdfd       e[gD      	 	 	 	 	 	 	 d{dej                  dej                  dej                  d"eRd#eeQ   daeHdbeGd^eRdceRdSeRd&ej                  fdg       ZeLj                  eKj                   eXdfd       e[gD      	 	 	 	 	 d|dej                  dej                  dej                  d"eRd#eeQ   dheId^eRdSeRd&ej                  fdi       ZeLj                  eKj                  eTeUe[gD      	 	 	 	 	 dsdej                  dej                  dej                  d eej                     d!eQd"eRd#eeQ   d$eRd&ej                  fdj       Zy)}    N)Enum)AnyCallableDictListLiteralOptionalTupleUnion   )
get_loggeris_flash_attn_3_availableis_flash_attn_availableis_flash_attn_versionis_sageattention_availableis_sageattention_versionis_torch_npu_availableis_torch_versionis_torch_xla_availableis_torch_xla_versionis_xformers_availableis_xformers_version)DIFFUSERS_ATTN_BACKENDDIFFUSERS_ATTN_CHECKSz2.6.3z2.1.12.5.0z2.2z0.0.29>=)flash_attn_funcflash_attn_varlen_funcr   )r   )sageattnsageattn_qk_int8_pv_fp8_cuda!sageattn_qk_int8_pv_fp8_cuda_sm90sageattn_qk_int8_pv_fp16_cudasageattn_qk_int8_pv_fp16_tritonsageattn_varlen)npu_fusion_attention)flash_attention)fp32	fp32+fp32)
per_threadper_warp)cudatritonc                   X    e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZy)AttentionBackendNameflashflash_varlen_flash_3_flash_varlen_3flexnative_native_cudnn_native_efficient_native_flash_native_math_native_npu_native_xlasagesage_varlen_sage_qk_int8_pv_fp8_cuda_sage_qk_int8_pv_fp8_cuda_sm90_sage_qk_int8_pv_fp16_cuda_sage_qk_int8_pv_fp16_tritonxformersN)__name__
__module____qualname__FLASHFLASH_VARLEN_FLASH_3_FLASH_VARLEN_3FLEXNATIVE_NATIVE_CUDNN_NATIVE_EFFICIENT_NATIVE_FLASH_NATIVE_MATH_NATIVE_NPU_NATIVE_XLASAGESAGE_VARLEN_SAGE_QK_INT8_PV_FP8_CUDA_SAGE_QK_INT8_PV_FP8_CUDA_SM90_SAGE_QK_INT8_PV_FP16_CUDA_SAGE_QK_INT8_PV_FP16_TRITONXFORMERS     ^/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/attention_dispatch.pyr/   r/      sn     E!LH'O DF#M+#M!LKK DK ;%E"!=#A  HrZ   r/   c                   t    e Zd Zi Zi Zi Z ee      Ze	Z
eddedeee      fd       Zed        Zed        Zy)_AttentionBackendRegistryNbackendconstraintsc                 P     t         j                  d d         fd}|S )NzRegistering attention backend: z with constraints: c                     | j                   <   xs g j                  <   t        t        j                  |       j
                  j                               j                  <   | S N)	_backends_constraintssetinspect	signature
parameterskeys_supported_arg_names)funcr^   clsr_   s    r[   	decoratorz5_AttentionBackendRegistry.register.<locals>.decorator   sZ    %)CMM'"(3(9rCW%03G4E4Ed4K4V4V4[4[4]0^C$$W-KrZ   )loggerdebug)rl   r^   r_   rm   s   ``` r[   registerz"_AttentionBackendRegistry.register   s-    6wi?RS^R_`a	 rZ   c                 L    | j                   | j                  | j                      fS rb   )_active_backendrc   rl   s    r[   get_active_backendz,_AttentionBackendRegistry.get_active_backend   s"    ""CMM#2E2E$FFFrZ   c                 H    t        | j                  j                               S rb   )listrc   ri   rs   s    r[   list_backendsz'_AttentionBackendRegistry.list_backends   s    CMM&&())rZ   rb   )rC   rD   rE   rc   rd   rj   r/   r   rr   r   _checks_enabledclassmethodr	   r   r   rp   rt   rw   rY   rZ   r[   r]   r]      sw    IL*+ABO+O	3 	(4PX>BZ 	 	 G G * *rZ   r]   r^   c              #      K   | t         j                  vrt        d|  d      t        |       } t	        |        t         j
                  }| t         _        	 d |t         _        y# |t         _        w xY ww)z>
    Context manager to set the active attention backend.
    zBackend z is not registered.N)r]   rc   
ValueErrorr/   %_check_attention_backend_requirementsrr   )r^   old_backends     r[   attention_backendr~      sm     
 /9998G9,?@AA"7+G)'2+;;K07-@4?!1K!1s   AA6A& A6&A33A6r^   querykeyvalue	attn_mask	dropout_p	is_causalscale
enable_gqaattention_kwargsreturnc	                b   |xs i }|	t         j                         \  }
}n*t        |	      }
t         j                  j	                  |
      }| ||||||d|}t        dd      r||d<   t         j                  rrt        |      t        t         j                  |
         z
  }|rt        j                  d|
 d| d       t         j                  j	                  |
      D ]
  } |di |  |j                         D ci c]  \  }}|t         j                  |
   v s||  }}} |di |S c c}}w )	N)r   r   r   r   r   r   r   r   r   r   z5Removing unsupported arguments for attention backend z: .rY   )r]   rt   r/   rc   getr   rx   re   rj   rn   warningrd   items)r   r   r   r   r   r   r   r   r   r^   backend_name
backend_fnkwargsremoved_kwargscheckkvs                    r[   dispatch_attention_fnr      sI    (-2 $=#O#O#Q j+G4.88<<\J
 	 	F g&)| 00Vs+D+Y+YZf+g'hhNNRS_R``bcqbrrstu.;;??M 	EOFO	  &||~stq!6O6d6deq6r1radsFs ts   =D+D+c                 $    | |rt        d      y y )Nz8`is_causal` cannot be True when `attn_mask` is not None.)r{   )r   r   r   s      r[   _check_attn_mask_or_causalr     s    STT "+rZ   c                     | j                   |j                   k7  s| j                   |j                   k7  rt        d      | j                  |j                  k7  s| j                  |j                  k7  rt        d      y )Nz1Query, key, and value must be on the same device.z/Query, key, and value must have the same dtype.)devicer{   dtyper   r   r   r   s       r[   _check_devicer   	  s]    ||szz!U\\U\\%ALMM{{cii5;;%++#=JKK $>rZ   c                 f    t        | ||       | j                  j                  dk7  rt        d      y )Nr,   z/Query, key, and value must be on a CUDA device.)r   r   typer{   r   s       r[   _check_device_cudar     s1    %e$||F"JKK #rZ   majorminorc                 |     dt         j                  dt         j                  dt         j                  dd f fd}|S )Nr   r   r   r   c                     t        | ||       t        j                  j                  | j                        fk  rt        d d d      y )NzJQuery, key, and value must be on a CUDA device with compute capability >= r   )r   torchr,   get_device_capabilityr   r{   )r   r   r   r   r   r   s       r[   check_device_cudaz:_check_device_cuda_atleast_smXY.<locals>.check_device_cuda  sY    5#u-::++ELL9UENJ\]b\ccdejdkklm  KrZ   )r   Tensor)r   r   r   s   `` r[   _check_device_cuda_atleast_smXYr     s9     ELL  dh  rZ   c                     | j                   |j                   k7  rt        d      | j                   |j                   k7  rt        d      y )Nz'Query and key must have the same dtype.z)Query and value must have the same dtype.)r   r{   r   s       r[   _check_qkv_dtype_matchr   !  s?    {{ciiBCC{{ekk!DEE "rZ   c                     t        | ||       | j                  t        j                  t        j                  fvrt        d      y )Nz9Query, key, and value must be either bfloat16 or float16.)r   r   r   bfloat16float16r{   r   s       r[   _check_qkv_dtype_bf16_or_fp16r   (  s8    5#u-{{5>>5==99TUU :rZ   c                    | j                   d   |j                   d   k7  rt        d      | j                   d   |j                   d   k7  rt        d      |+|j                   d   |j                   d   k7  rt        d      y y )Nz0Query and key must have the same last dimension.z<Query and value must have the same second to last dimension.z=Attention mask must match the key's second to last dimension.)shaper{   )r   r   r   r   r   s        r[   _check_shaper   .  s     {{2#))B-'KLL{{2%++b/)WXX!4		"!EXYY "FrZ   c                    | t         j                  t         j                  fv r't        s t	        d| j
                   dt         d      y | t         j                  t         j                  fv r t        st	        d| j
                   d      y | t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  fv r't         s t	        d| j
                   dt"         d      y | t         j$                  k(  r t&        st	        d| j
                   d	      y | t         j(                  k(  r t*        st	        d
| j
                   d      y | t         j,                  k(  r't.        s t	        d| j
                   dt0         d      y | t         j2                  k(  r't4        s t	        d| j
                   dt6         d      y y )NzFlash Attention backend 'zb' is not usable because of missing package or the version is too old. Please install `flash-attn>=z`.zFlash Attention 3 backend 'zp' is not usable because of missing package or the version is too old. Please build FA3 beta release from source.zSage Attention backend 'ze' is not usable because of missing package or the version is too old. Please install `sageattention>=zFlex Attention backend 'zd' is not usable because of missing package or the version is too old. Please install `torch>=2.5.0`.zNPU Attention backend 'za' is not usable because of missing package or the version is too old. Please install `torch_npu`.zXLA Attention backend 'za' is not usable because of missing package or the version is too old. Please install `torch_xla>=zXformers Attention backend 'z`' is not usable because of missing package or the version is too old. Please install `xformers>=)r/   rF   rG   _CAN_USE_FLASH_ATTNRuntimeErrorr   _REQUIRED_FLASH_VERSIONrH   rI   _CAN_USE_FLASH_ATTN_3rR   rS   rT   rU   rV   rW   _CAN_USE_SAGE_ATTN_REQUIRED_SAGE_VERSIONrJ   _CAN_USE_FLEX_ATTNrP   _CAN_USE_NPU_ATTNrQ   _CAN_USE_XLA_ATTN_REQUIRED_XLA_VERSIONrX   _CAN_USE_XFORMERS_ATTN_REQUIRED_XFORMERS_VERSIONr   s    r[   r|   r|   @  sR   '--/C/P/PQQ"+GMM?  ;]  ^u  ]v  vx  y  #
 
)224H4X4XY	Y$-gmm_  =m  n  %
 
!!((66;;7799 
 "*7==/  :_  `v  _w  wy  z  "
 
(--	-!*7==/  :^  _  "
 
(44	4 )'--  9Z  [  !
 
(44	4 )'--  9Z  [p  Zq  qs  t  !
 
(11	1%.w}}o  >^  _y  ^z  z|  }  & 
2rZ      )maxsize
batch_size	seq_len_q
seq_len_kvr   c                 <   t        j                  | f|t         j                  |      }t        j                  | f|t         j                  |      }t        j                  | dz   t         j                  |      }t        j                  | dz   t         j                  |      }t        j                  |d      |dd  t        j                  |d      |dd  |j                         j                         }|j                         j                         }	||f||f||	ffS )Nr   r      r   dim)r   fullint32zeroscumsummaxitem)
r   r   r   r   	seqlens_q	seqlens_kcu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_ks
             r[   3_prepare_for_flash_attn_or_sage_varlen_without_maskr   s  s     

J=)5;;vVI

J=*EKKPVWI;;zA~U[[PL;;zA~U[[PL||I15L||I15L==?'')L==?'')Ly!L,#?,P\A]]]rZ   c                 .   t        j                  | f|t         j                  |      }|j                  dt         j                        }t        j                  | dz   t         j                  |      }t        j                  | dz   t         j                  |      }t        j
                  |d      |dd  t        j
                  |d      |dd  |j                         j                         }|j                         j                         }	||f||f||	ffS )Nr   r   )r   r   r   r   )r   r   r   sumr   r   r   r   )
r   r   r   r   r   r   r   r   r   r   s
             r[   0_prepare_for_flash_attn_or_sage_varlen_with_maskr     s     

J=)5;;vVI!5;;7I;;zA~U[[PL;;zA~U[[PL||I15L||I15L==?'')L==?'')Ly!L,#?,P\A]]]rZ   c                 >    |t        | |||      S t        | |||      S rb   )r   r   )r   r   r   r   r   s        r[   &_prepare_for_flash_attn_or_sage_varlenr     s0     B:yZdflmm;J	S\^deerZ   	seq_len_kc           	         | j                   t        j                  k7  rt        d| j                    d      | j                  dk(  r#| j                  d      j                  ||      } n@| j                  dk(  rG| j                  d      d|fvrt        d| j                  d    d| d      | j                  ||      } n| j                  d	k(  rY| j                  d      d|fvrt        d| j                  d    d| d
      | j                  d      } | j                  ||      } n| j                  dk(  r[| j                  d      d|fvrt        d| j                  d    d| d      | j                  |dd|      } | j                  d      } nt        d| j                         | j                  ||fk7  rt        d| j                   d| d| d      | S )z
    Normalize an attention mask to shape [batch_size, seq_len_k] (bool) suitable for inferring seqlens_[q|k] in
    FlashAttention/Sage varlen.

    Supports 1D to 4D shapes and common broadcasting patterns.
    z)Attention mask must be of type bool, got r   r   r   r   zattn_mask.shape[0] (z) must be 1 or z for 2D attention mask.   z for 3D attention mask.r      z for 4D attention mask.r   )r   r   z"Unsupported attention mask shape: z.Normalized attention mask shape mismatch: got z, expected (z, ))
r   r   boolr{   ndim	unsqueezeexpandsizer   any)r   r   r   s      r[   _normalize_attn_maskr     s    %**$DY__DUUVWXX~~''*11*iH		1	>>!Q
O3&yq'9&:/*Ulm  $$Z;		1	 >>!Q
O3&yq'9&:/*Ulm  MMaM(	$$Z;		1	>>!Q
O3&yq'9&:/*Ulm  $$ZRC	MMfM-	 =ioo=NOPP:y11<Y__<M\ZdYeeghqgrrst
 	
 rZ   c                     ||k\  S rb   rY   )	batch_idxhead_idxq_idxkv_idxs       r[   _flex_attention_causal_mask_modr     s    F?rZ   z!flash_attn_3::_flash_attn_forwardrY   r,   )mutates_argsdevice_typesc                 P    t        | ||      \  }}|j                  ddd      }||fS )Nr   r   r   )flash_attn_3_funcpermute)r   r   r   outlses        r[   _wrapped_flash_attn_3_originalr     s1     !U3HC
++aA
C8OrZ   c                 z    | j                   \  }}}}|||f}t        j                  |       | j                  |      fS rb   )r   r   
empty_like	new_empty)r   r   r   r   seq_len	num_headshead_dim	lse_shapes           r[   _r     s?    /4{{,JHWi0IE"EOOI$>>>rZ   )r_   window_sizesoftcapalibi_slopesdeterministicreturn_attn_probsc                 2    t        | |||||||||	|
      }|S )N)qr   r   r   softmax_scalecausalr   r  r  r  r  r   )r   r   r   r   r   r   r   r  r  r  r  r   s               r[   _flash_attentionr	    s6    " 


!#+C JrZ   r   r   r   r   c                 b   | j                   \  }}}}|j                   \  }}}}|t        |||      }t        d ||||fD              r(t        ||||| j                        \  \  }}\  }}\  }}nt        j                  |f|t
        j                  | j                        }|j                  t
        j                  | j                        }|j                  t
        j                  | j                        }g g }}t        |      D ]7  }||   }|j                  ||d |f          |j                  ||d |f          9 | j                  dd      }t        j                  |d      }t        j                  |d      }t        ||||||||||	|
||||      }|j                  d|df      }|S )	Nc              3   $   K   | ]  }|d u  
 y wrb   rY   .0xs     r[   	<genexpr>z*_flash_varlen_attention.<locals>.<genexpr>1       
W19
W   r   r   r   r   r   r   )r  r   r   r   r   r   r   r   r  r  r   r  r  r  r  r   )r   r   r   r   r   r   r   r   torangeappendflattencatr   	unflatten)r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r   r   r   r   r   r   	key_validvalue_validb	valid_lenquery_packed
key_packedvalue_packedr   s                                r[   _flash_varlen_attentionr     s   , #(++J	1a))Az1a(J
K	

W|\<V
WW2IzYu|| 	SI4|6R|\ JJ
}l%++V[VbVbc	#U[[N#U[[N{I: 1aL	Q

]+,5JYJ/01
 ==A&L9!,J99[a0L
 


!!!!!#+C" --J+
,CJrZ   c	                     t        di d| d|d|d|d|dd dd dd d	d d
|ddd|dddd d|dd^}	}
}|r|	|
fS |	S )Nr  r   r   r  r  qv	q_descale	k_descale	v_descaler   attention_chunkr   r  
num_splitsr   pack_gqar  	sm_marginrY   )r   )r   r   r   r   r   r   r  r  r  r   r   r   s               r[   _flash_attention_3r*  \  s     % 

  	
            $  !LCq$ +C:33rZ   c                    | j                   \  }}}}|j                   \  }}}}|t        |||      }t        d ||||fD              r(t        ||||| j                        \  \  }}\  }}\  }}nt        j                  |f|t
        j                  | j                        }|j                  t
        j                  | j                        }|j                  t
        j                  | j                        }g g }}t        |      D ]7  }||   }|j                  ||d |f          |j                  ||d |f          9 | j                  dd      }t        j                  |d      }t        j                  |d      }t        di d|d|d	|d
|d|d|d|dd dd d|d|dd dd dd dd d|	d|
dddd d|dd^}}}|j                  d|df      }|r||fS |S )Nc              3   $   K   | ]  }|d u  
 y wrb   rY   r  s     r[   r  z,_flash_varlen_attention_3.<locals>.<genexpr>  r  r  r  r   r   r   r   r  r   r   r   r   r   r   	seqused_q	seqused_kr  r  r"  r#  r$  r%  r   r  r'  r(  r  r)  r   rY   )r   r   r   r   r   r   r   r   r  r  r  r  r  flash_attn_3_varlen_funcr  )r   r   r   r   r   r   r   r   r   r   r  r  r  r   r   r   r   r   r   r  r  r  r  r  r  r  r   r   s                               r[   _flash_varlen_attention_3r0    so   ( #(++J	1a))Az1a(J
K	

W|\<V
WW2IzYu|| 	SI4|6R|\ JJ
}l%++V[VbVbc	#U[[N#U[[N{I: 1aL	Q

]+,5JYJ/01
 ==A&L9!,J99[a0L+ 

  "	
 " " "           !" #$ %& '( $)* +LCq. --J+
,C*C:33rZ   zflex_attention.BlockMask
return_lsekernel_optionsc	                    d }	d }
| j                   \  }}}}|j                   \  }}}}t        t        j                        r}
n|r)t        j                  t
        ||||| j                        }
nt        j                        rj                  dk(  r2j                  j                  d      dj                  d      d      j                  ||||      j                  t        j                  k(  r*fd}t        j                  ||d ||| j                        }
nfd}	nt        d      d | ||fD        \  } }}t        j                  | |||	|
||||	      }|j!                  dddd	      }|S )
Nr   r   r   c                     | |||f   S rb   rY   )r   r   r   r   r   s       r[   mask_modz(_native_flex_attention.<locals>.mask_mod  s     HeV!CDDrZ   c                     | ||||f   z   S rb   rY   )scorer   r   r   r   r   s        r[   	score_modz)_native_flex_attention.<locals>.score_mod  s    yHeV)KLLLrZ   zCAttention mask must be either None, a BlockMask, or a 2D/4D tensor.c              3   D   K   | ]  }|j                  d ddd        ywr   r   r   r   Nr   r  s     r[   r  z)_native_flex_attention.<locals>.<genexpr>       L11aA.L    )	r   r   r   r8  
block_maskr   r   r1  r2  r   )r   
isinstanceflex_attention	BlockMaskcreate_block_maskr   r   r   	is_tensorr   viewr   r   r   r   r{   r   )r   r   r   r   r   r   r   r1  r2  r8  r>  r   r   r   r   r   r5  r   s      `              r[   _native_flex_attentionrE    sv     IJ*/++'J	9a))Az1aJy.2J2JK
	#55+ZIz[`[g[g

 
	#>>Q!y~~a'8!Y^^A=NPQRI$$ZIzR	??ejj(E (99*dIz5<<J
M ^__LU8KLE3

'
'%
C ++aAq
!CJrZ   c           
          d | ||fD        \  } }}t         j                  j                  j                  | |||||||      }|j	                  dddd      }|S )Nc              3   D   K   | ]  }|j                  d ddd        ywr:  r;  r  s     r[   r  z$_native_attention.<locals>.<genexpr>  r<  r=  r   r   r   r   r   r   r   r   r   r   r   r   )r   nn
functionalscaled_dot_product_attentionr   	r   r   r   r   r   r   r   r   r   s	            r[   _native_attentionrM    so     MU8KLE3
((


:
: ; 	C ++aAq
!CJrZ   c                    d | ||fD        \  } }}t         j                  j                  j                  t         j                  j                  j                  j
                        5  t         j                  j                  j                  | |||||||      }d d d        j                  dddd      }|S # 1 sw Y   xY w)Nc              3   D   K   | ]  }|j                  d ddd        ywr:  r;  r  s     r[   r  z*_native_cudnn_attention.<locals>.<genexpr>6  r<  r=  rH  r   r   r   r   )	r   rI  	attentionsdpa_kernel
SDPBackendCUDNN_ATTENTIONrJ  rK  r   rL  s	            r[   _native_cudnn_attentionrT  (  s     MU8KLE3				'	'(:(:(E(E(U(U	V 

hh!!>>! ? 	


 ++aAq
!CJ

 

   &2B66B?c                    d | ||fD        \  } }}t         j                  j                  j                  t         j                  j                  j                  j
                        5  t         j                  j                  j                  | |||||||      }d d d        j                  dddd      }|S # 1 sw Y   xY w)Nc              3   D   K   | ]  }|j                  d ddd        ywr:  r;  r  s     r[   r  z._native_efficient_attention.<locals>.<genexpr>T  r<  r=  rH  r   r   r   r   )	r   rI  rP  rQ  rR  EFFICIENT_ATTENTIONrJ  rK  r   rL  s	            r[   _native_efficient_attentionrY  F  s     MU8KLE3				'	'(:(:(E(E(Y(Y	Z 

hh!!>>! ? 	


 ++aAq
!CJ

 

rU  c                    d | ||fD        \  } }}t         j                  j                  j                  t         j                  j                  j                  j
                        5  t         j                  j                  j                  | ||d ||||      }d d d        j                  dddd      }|S # 1 sw Y   xY w)Nc              3   D   K   | ]  }|j                  d ddd        ywr:  r;  r  s     r[   r  z*_native_flash_attention.<locals>.<genexpr>q  r<  r=  rH  r   r   r   r   )	r   rI  rP  rQ  rR  FLASH_ATTENTIONrJ  rK  r   )r   r   r   r   r   r   r   r   s           r[   _native_flash_attentionr]  d  s     MU8KLE3				'	'(:(:(E(E(U(U	V 

hh!!>>! ? 	


 ++aAq
!CJ

 

rU  c                    d | ||fD        \  } }}t         j                  j                  j                  t         j                  j                  j                  j
                        5  t         j                  j                  j                  | |||||||      }d d d        j                  dddd      }|S # 1 sw Y   xY w)Nc              3   D   K   | ]  }|j                  d ddd        ywr:  r;  r  s     r[   r  z)_native_math_attention.<locals>.<genexpr>  r<  r=  rH  r   r   r   r   )	r   rI  rP  rQ  rR  MATHrJ  rK  r   rL  s	            r[   _native_math_attentionra    s     MU8KLE3				'	'(:(:(E(E(J(J	K 

hh!!>>! ? 	


 ++aAq
!CJ

 

rU  c                     t        | ||| j                  d      dd |%dt        j                  | j                  d         z  n|ddd|z
  dd      d   S )	Nr   BSNDg      ?r   i   Fr   )input_layoutpser   pre_tockensnext_tockens	keep_probsyncinner_precise)r&   r   mathsqrtr   )r   r   r   r   r   s        r[   _native_npu_attentionrm    sg      

127-cDIIekk"o..U	/ 	 	rZ   c                     d | ||fD        \  } }}| t        j                  | j                  d         z  } t        | |||      }|j	                  dddd      }|S )Nc              3   D   K   | ]  }|j                  d ddd        ywr:  r;  r  s     r[   r  z(_native_xla_attention.<locals>.<genexpr>  r<  r=  r   )r  r   r   r  r   r   r   r   )rk  rl  r   xla_flash_attentionr   )r   r   r   r   r   s        r[   _native_xla_attentionrq    si     MU8KLE3DIIekk"o..E



	C ++aAq
!CJrZ   c           	      &    t        | ||d|||      S )NNHD)r  r   r   tensor_layoutr   sm_scaler1  )r    )r   r   r   r   r   r1  s         r[   _sage_attentionrv    s&     


 rZ   smooth_kc                 X   | j                   \  }}}}|j                   \  }}}}|
t        |
||      }
t        d ||||fD              r(t        ||||
| j                        \  \  }}\  }}\  }}nt        j                  |f|t
        j                  | j                        }|j                  t
        j                  | j                        }|j                  t
        j                  | j                        }g g }}t        |      D ]7  }||   }|j                  ||d |f          |j                  ||d |f          9 | j                  dd      }t        j                  |d      }t        j                  |d      }t        ||||||||||	
      }|j                  d|df      }|S )	Nc              3   $   K   | ]  }|d u  
 y wrb   rY   r  s     r[   r  z)_sage_varlen_attention.<locals>.<genexpr>  r  r  r  r   r   r   r   )
r  r   r   r   r   r   r   r   ru  rw  r   )r   r   r   r   r   r   r   r   r  r  r  r  r  r%   r  )r   r   r   r   r   r   r   r   r   rw  r   r   r   r   r   r   r  r  r  r  r  r  r  r   s                           r[   _sage_varlen_attentionrz    s   " #(++J	1a))Az1a(J
K	

W|\<V
WW2IzYu|| 	SI4|6R|\ JJ
}l%++V[VbVbc	#U[[N#U[[N{I: 1aL	Q

]+,5JYJ/01
 ==A&L9!,J99[a0L



!!!!C --J+
,CJrZ   	   qk_quant_granpv_accum_dtypesmooth_vc
                 .    t        | ||d|||||||	      S Nrs  )r  r   r   rt  r   r|  ru  r}  rw  r~  r1  )r!   
r   r   r   r   r   r|  r}  rw  r~  r1  s
             r[   #_sage_qk_int8_pv_fp8_cuda_attentionr  %  s2      (


#% rZ   c	                 ,    t        | ||d||||||
      S )Nrs  )
r  r   r   rt  r   r|  ru  r}  rw  r1  )r"   )	r   r   r   r   r   r|  r}  rw  r1  s	            r[   (_sage_qk_int8_pv_fp8_cuda_sm90_attentionr  D  s/     -


#% rZ      c
                 .    t        | ||d|||||||	      S r  )r#   r  s
             r[   $_sage_qk_int8_pv_fp16_cuda_attentionr  a  s2      )


#% rZ   quantization_backendc                 *    t        | ||d|||||	      S )Nrs  )	r  r   r   rt  r  r   ru  rw  r1  )r$   )r   r   r   r   r   r  rw  r1  s           r[   &_sage_qk_int8_pv_fp16_triton_attentionr    s,     +


1
 
rZ   c                    | j                   \  }}	}
}|j                   \  }}}}|rt        j                         }n||j                  dk(  r3|j	                  |j                  d      d|j                  d      d      }n|j                  dk7  rt        d      |j                  ||
|	|      j                  |       }|rz|
|z  dk7  rt        d      |
|z  }| j                  d|df      } |j                  d|df      j                  ddd|d      }|j                  d|df      j                  ddd|d      }t        j                  | |||||      }|r|j                  dd      }|S )	Nr   r   r   r   zDOnly 2D and 4D attention masks are supported for xformers attention.zKNumber of heads in query must be divisible by number of heads in key/value.r   r   )r   xopsLowerTriangularMaskr   rD  r   r{   r   type_asr  memory_efficient_attentionr  )r   r   r   r   r   r   r   r   r   r   num_heads_qr   r   num_heads_kvnum_heads_per_groupr   s                   r[   _xformers_attentionr    so    -2KK)J	;%(YY"Az<,,.			>>Q!y~~a'8!Y^^A=NPQRI^^q cdd$$ZiT\\]bc	%*jkk)\9L"#56mmAb1299"b"FY[]^L"#56==b"bJ]_ab

)
)%eY	SX
YCkk!QJrZ   )N        FNFNrb   )NN)r  NFr   r   r  NFF)NNNNr  NFr  r  NFFN)NFr  r  FF)NNNNNFr  r  FFN)NFNFFN)Nr  FNF)r  FNF)r  N)F)FNF)NNNNFNTN)FNr*   r)   TFF)FNr*   r)   TF)FNr*   r(   TFF)FNr-   TF)
contextlib	functoolsrf   rk  enumr   typingr   r   r   r   r   r	   r
   r   r   utilsr   r   r   r   r   r   r   r   r   r   r   r   utils.constantsr   r   r   r   _REQUIRED_FLEX_VERSIONr   r   r   r   r   r   r   r   r   
flash_attnr   r   flash_attn_interfacer   r/  sageattentionr    r!   r"   r#   r$   r%   !torch.nn.attention.flex_attentionrI  rP  r@  	torch_npur&   $torch_xla.experimental.custom_kernelr'   rp  xformers.opsopsr  rC   rn   _SAGE_ATTENTION_PV_ACCUM_DTYPE_SAGE_ATTENTION_QK_QUANT_GRAN$_SAGE_ATTENTION_QUANTIZATION_BACKENDstrr/   r]   contextmanagerrK   r~   r   floatr   r   r   r   r   intr   r   r   r   r|   	lru_cacher   r   r   r   r   r   library	custom_opr   register_faker   rp   rF   r	  rG   r   rH   r*  rI   r0  rJ   rE  rM  rL   rT  rM   rY  rN   r]  rO   ra  rP   rm  rQ   rq  rR   rv  rS   rz  rT   r  rU   r  rV   r  rW   r  rX   r  rY   rZ   r[   <module>r     s[        M M M     L "      % -/h4I$Pg4h 13 /1l6NtUk6l %d,BC *, *,b1EdLa1b .0j5HOi5j  BBO! IW#    H$(!&*##' (,%O  ?> . [ D 
H	 "))<!=  '(@ A './?'@ $3 D* *8 BVB]B] @uS*>%>? @ @. )-!15,  /3, <<, 	,  <<,  %	, 
 ,  ,  E?,  ,  tCH~.,  *+,  \\, fU(5<<*@ UT U`d U
L LELL L L\` LLell L Lell Lae L3 s x F%,, FU\\ F%,, Fei FV VELL VQVQ]Q] Vlp V )-	Z<<Z	Z <<Z %	Z 
Z$03G 0D 0f S!
 &*	^^^ ^ U\\"	^ "^* &*	^^^ ||^ U\\"	^* )-%)	f	f	f 	f %		f
 U\\"	f 
	f1ELL 1c 1c 1V[VbVb 1h <2\bc<<#ll38<<
5<<%& d @A?U\\ ? ?U\\ ?eELLZ_ZfZfLfFg ? B? ## =|L $  !#++/#<<	 << 	
 E?  sCx  5<<(   \\	: ##%% =|L $  ,0+/"&"&!#++/#(,!@<<@	@ <<@ 5<<(	@
 5<<(@ 3-@ 3-@ @ E?@ @ sCx@ @ 5<<(@ @ @  %!@" \\#@	@F ##!! =|L $  "#+#4<<4	4 <<4 E?	4
 4 sCx4 4 4 4 \\4	4@ ##(( =|L $  ,0+/"&"&!#+#(,D4<<D4	D4 <<D4 5<<(	D4
 5<<(D4 3-D4 3-D4 E?D4 D4 sCxD4 D4 D4 D4 %D4 \\D4	D4N ##+]LI $  LP!/39<<9	9 <<9 ell,FFGH	9
 9 E?9 9 9 T#s(^,9 \\9	9x ##- $  )-!<<	 << %	
   E?  \\	2 ##&& =|L $  )-!<<	 << %	
   E?  \\	4 ##**- $  )-!<<	 << %	
   E?  \\	4 ##&& =|L $  !<<	 << 	
  E?  \\	2 ##%%- $  )-!<<	 << %	
   E?  \\	4 ##$$ =|L $  !	<<			 <<	 		
 E?	 \\			0 ##$$- $  	<<	 << 	
 \\	$ ###%BLQ $  !<<	 << 	
 E?  \\	& ##$$#%BLQ $  ,0+/"&"&!(,6<<6	6 <<6 5<<(	6
 5<<(6 3-6 3-6 6 E?6 6 %6 \\6	6r ##220A6E $  !3?5@<<	 << 	
 E? 1 3    \\	6 ##770A6E $  !3?5@<<	 << 	
 E? 1 3   \\	2 ##330A6E $  !3?5;<<	 << 	
 E? 1 3    \\	6 ##550A6E $  !AI<<	 << 	
 E? ?   \\	. ##!!+]LI $  )-!#<<#	# <<# %	#
 # # E?# # \\#	#rZ   