
    bi<              
          d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	ddl
mZ ddlmZmZ ddlmZ dd	lmZmZmZmZ dd
lmZmZ  ej2                  e      ZdZe G d d             Z G d d      Z G d de      Zde	j@                  jB                  defdZ"de#dedede$fdZ%deeef   dee&e&f   de&deg e&f   fdZ'y)    N)	dataclass)AnyCallableOptionalTupleUnion   )AttentionModuleMixin)	AttentionMochiAttention)logging   )_ATTENTION_CLASSES$_CROSS_TRANSFORMER_BLOCK_IDENTIFIERS&_SPATIAL_TRANSFORMER_BLOCK_IDENTIFIERS'_TEMPORAL_TRANSFORMER_BLOCK_IDENTIFIERS)HookRegistry	ModelHookpyramid_attention_broadcastc                      e Zd ZU dZdZee   ed<   dZee   ed<   dZ	ee   ed<   dZ
eeef   ed<   dZeeef   ed<   dZeeef   ed	<   eZeed
f   ed<   eZeed
f   ed<   eZeed
f   ed<   dZeg ef   ed<   defdZy)PyramidAttentionBroadcastConfiga
  
    Configuration for Pyramid Attention Broadcast.

    Args:
        spatial_attention_block_skip_range (`int`, *optional*, defaults to `None`):
            The number of times a specific spatial attention broadcast is skipped before computing the attention states
            to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e.,
            old attention states will be re-used) before computing the new attention states again.
        temporal_attention_block_skip_range (`int`, *optional*, defaults to `None`):
            The number of times a specific temporal attention broadcast is skipped before computing the attention
            states to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times
            (i.e., old attention states will be re-used) before computing the new attention states again.
        cross_attention_block_skip_range (`int`, *optional*, defaults to `None`):
            The number of times a specific cross-attention broadcast is skipped before computing the attention states
            to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e.,
            old attention states will be re-used) before computing the new attention states again.
        spatial_attention_timestep_skip_range (`Tuple[int, int]`, defaults to `(100, 800)`):
            The range of timesteps to skip in the spatial attention layer. The attention computations will be
            conditionally skipped if the current timestep is within the specified range.
        temporal_attention_timestep_skip_range (`Tuple[int, int]`, defaults to `(100, 800)`):
            The range of timesteps to skip in the temporal attention layer. The attention computations will be
            conditionally skipped if the current timestep is within the specified range.
        cross_attention_timestep_skip_range (`Tuple[int, int]`, defaults to `(100, 800)`):
            The range of timesteps to skip in the cross-attention layer. The attention computations will be
            conditionally skipped if the current timestep is within the specified range.
        spatial_attention_block_identifiers (`Tuple[str, ...]`):
            The identifiers to match against the layer names to determine if the layer is a spatial attention layer.
        temporal_attention_block_identifiers (`Tuple[str, ...]`):
            The identifiers to match against the layer names to determine if the layer is a temporal attention layer.
        cross_attention_block_identifiers (`Tuple[str, ...]`):
            The identifiers to match against the layer names to determine if the layer is a cross-attention layer.
    N"spatial_attention_block_skip_range#temporal_attention_block_skip_range cross_attention_block_skip_range)d   i   %spatial_attention_timestep_skip_range&temporal_attention_timestep_skip_range#cross_attention_timestep_skip_range.#spatial_attention_block_identifiers$temporal_attention_block_identifiers!cross_attention_block_identifierscurrent_timestep_callbackreturnc                    d| j                    d| j                   d| j                   d| j                   d| j                   d| j
                   d| j                   d| j                   d	| j                   d
| j                   dS )NzFPyramidAttentionBroadcastConfig(
  spatial_attention_block_skip_range=z(,
  temporal_attention_block_skip_range=z%,
  cross_attention_block_skip_range=z*,
  spatial_attention_timestep_skip_range=z+,
  temporal_attention_timestep_skip_range=z(,
  cross_attention_timestep_skip_range=z(,
  spatial_attention_block_identifiers=z),
  temporal_attention_block_identifiers=z&,
  cross_attention_block_identifiers=z,
  current_timestep_callback=z
))
r   r   r   r   r   r   r   r    r!   r"   selfs    f/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/hooks/pyramid_attention_broadcast.py__repr__z(PyramidAttentionBroadcastConfig.__repr__[   s    4484[4[3\ ]5595]5]4^ _2262W2W1X Y77;7a7a6b c88<8c8c7d e5595]5]4^ _5595]5]4^ _66:6_6_5` a3373Y3Y2Z [++/+I+I*J K	
    )__name__
__module____qualname____doc__r   r   int__annotations__r   r   r   r   r   r   r   r   strr   r    r   r!   r"   r   r(    r)   r'   r   r   '   s    B 9=&<9='#=6:$hsm:=G)5c?G>H*E#s(OH;E'sCxE;a'sCxa<c(%S/c9]%uS#X]37xC07

# 
r)   r   c                   $    e Zd ZdZddZd Zd Zy)PyramidAttentionBroadcastStatea+  
    State for Pyramid Attention Broadcast.

    Attributes:
        iteration (`int`):
            The current iteration of the Pyramid Attention Broadcast. It is necessary to ensure that `reset_state` is
            called before starting a new inference forward pass for PAB to work correctly.
        cache (`Any`):
            The cached output from the previous forward pass. This is used to re-use the attention states when the
            attention computation is skipped. It is either a tensor or a tuple of tensors, depending on the module.
    Nc                      d| _         d | _        y Nr   	iterationcacher%   s    r'   __init__z'PyramidAttentionBroadcastState.__init__y       
r)   c                      d| _         d | _        y r5   r6   r%   s    r'   resetz$PyramidAttentionBroadcastState.reset}   r:   r)   c                     d}| j                   d}n1d| j                   j                   d| j                   j                   d}d| j                   d| dS )N NonezTensor(shape=z, dtype=)z)PyramidAttentionBroadcastState(iteration=z, cache=)r8   shapedtyper7   )r&   
cache_reprs     r'   r(   z'PyramidAttentionBroadcastState.__repr__   s^    
::J()9)9(:(4::CSCSBTTUVJ:4>>:J(S]R^^_``r)   )r#   N)r*   r+   r,   r-   r9   r<   r(   r1   r)   r'   r3   r3   l   s    
ar)   r3   c                        e Zd ZdZdZdeeef   dedeg ef   ddf fdZd	 Z	d
e
j                  j                  defdZd
e
j                  j                  ddfdZ xZS )PyramidAttentionBroadcastHookzBA hook that applies Pyramid Attention Broadcast to a given module.Ttimestep_skip_rangeblock_skip_ranger"   r#   Nc                 L    t         |           || _        || _        || _        y N)superr9   rF   rG   r"   )r&   rF   rG   r"   	__class__s       r'   r9   z&PyramidAttentionBroadcastHook.__init__   s(     	#6  0)B&r)   c                 $    t               | _        |S rI   )r3   stater&   modules     r'   initialize_hookz-PyramidAttentionBroadcastHook.initialize_hook   s    35
r)   rO   c                     | j                   d   | j                         cxk  xr | j                   d   k  nc }| j                  j                  d u xsF | j                  j                  dk(  xs+ | xs& | j                  j                  | j
                  z  dk(  }|r | j                  j                  |i |}n| j                  j                  }|| j                  _        | j                  xj                  dz  c_        |S )Nr   r   )rF   r"   rM   r8   r7   rG   fn_reforiginal_forward)r&   rO   argskwargsis_within_timestep_rangeshould_compute_attentionoutputs          r'   new_forwardz)PyramidAttentionBroadcastHook.new_forward   s    $$Q'$*H*H*JhTMeMefgMhh 	! JJ$ Azz##q(A++A zz##d&;&;;q@	 	! $1T[[114B6BFZZ%%F!



!r)   c                 :    | j                   j                          |S rI   )rM   r<   rN   s     r'   reset_statez)PyramidAttentionBroadcastHook.reset_state   s    

r)   )r*   r+   r,   r-   _is_statefulr   r.   r   r9   rP   torchnnModuler   rY   r[   __classcell__)rK   s   @r'   rE   rE      s    MLC#(c?CFICfnoqsvovfwC	C%((// s (%((// d r)   rE   rO   configc                 0   |j                   t        d      |j                  4|j                  (|j                  t
        j                  d       d|_        | j                         D ],  \  }}t        |g t        t              s t        |||       . y)aT  
    Apply [Pyramid Attention Broadcast](https://huggingface.co/papers/2408.12588) to a given pipeline.

    PAB is an attention approximation method that leverages the similarity in attention states between timesteps to
    reduce the computational cost of attention computation. The key takeaway from the paper is that the attention
    similarity in the cross-attention layers between timesteps is high, followed by less similarity in the temporal and
    spatial layers. This allows for the skipping of attention computation in the cross-attention layers more frequently
    than in the temporal and spatial layers. Applying PAB will, therefore, speedup the inference process.

    Args:
        module (`torch.nn.Module`):
            The module to apply Pyramid Attention Broadcast to.
        config (`Optional[PyramidAttentionBroadcastConfig]`, `optional`, defaults to `None`):
            The configuration to use for Pyramid Attention Broadcast.

    Example:

    ```python
    >>> import torch
    >>> from diffusers import CogVideoXPipeline, PyramidAttentionBroadcastConfig, apply_pyramid_attention_broadcast
    >>> from diffusers.utils import export_to_video

    >>> pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16)
    >>> pipe.to("cuda")

    >>> config = PyramidAttentionBroadcastConfig(
    ...     spatial_attention_block_skip_range=2,
    ...     spatial_attention_timestep_skip_range=(100, 800),
    ...     current_timestep_callback=lambda: pipe.current_timestep,
    ... )
    >>> apply_pyramid_attention_broadcast(pipe.transformer, config)
    ```
    NztThe `current_timestep_callback` function must be provided in the configuration to apply Pyramid Attention Broadcast.aP  Pyramid Attention Broadcast requires one or more of `spatial_attention_block_skip_range`, `temporal_attention_block_skip_range` or `cross_attention_block_skip_range` parameters to be set to an integer, not `None`. Defaulting to using `spatial_attention_block_skip_range=2`. To avoid this warning, please set one of the above parameters.r	   )r"   
ValueErrorr   r   r   loggerwarningnamed_modules
isinstancer   r
   5_apply_pyramid_attention_broadcast_on_attention_class)rO   ra   name	submodules       r'   !apply_pyramid_attention_broadcastrk      s    D ''/ C
 	

 	11966>33;M	

 561!//1 Wi)%P'9%P;O%PQ =dIvVWr)   ri   r#   c                     t         fd|j                  D              xr |j                  d uxr t        |dd       }t         fd|j                  D              xr |j
                  d uxr t        |dd       }t         fd|j                  D              xr |j                  d uxr t        |dd      }d\  }}}|r|j                  }|j                  }d}n9|r|j
                  }|j                  }d}n|r|j                  }|j                  }d	}||t        j                  d
  d       yt        j                  d| d         t        ||||j                         y)Nc              3   N   K   | ]  }t        j                  |      d u  y wrI   research.0
identifierri   s     r'   	<genexpr>zH_apply_pyramid_attention_broadcast_on_attention_class.<locals>.<genexpr>   s!     q
BIIj$'t3q   "%is_cross_attentionFc              3   N   K   | ]  }t        j                  |      d u  y wrI   rn   rq   s     r'   rt   zH_apply_pyramid_attention_broadcast_on_attention_class.<locals>.<genexpr>   s!     r
BIIj$'t3rru   c              3   N   K   | ]  }t        j                  |      d u  y wrI   rn   rq   s     r'   rt   zH_apply_pyramid_attention_broadcast_on_attention_class.<locals>.<genexpr>   s!     o
BIIj$'t3oru   )NNNspatialtemporalcrosszDUnable to apply Pyramid Attention Broadcast to the selected layer: "z" because it does not match any of the required criteria for spatial, temporal or cross attention layers. Note, however, that this layer may still be valid for applying PAB. Please specify the correct block identifiers in the configuration.z&Enabling Pyramid Attention Broadcast (z) in layer: T)anyr   r   getattrr    r   r!   r   r   r   r   rd   infodebug'_apply_pyramid_attention_broadcast_hookr"   )	ri   rO   ra   is_spatial_self_attentionis_temporal_self_attentionrv   rG   rF   
block_types	   `        r'   rh   rh      s    	qfFpFpqq 	=55TA	= 4e<<  	rfFqFqrr 	=66dB	= 4e<<  	ofFnFnoo 	9334?	9F0%8  9I5): !DD$JJ
	#!EE$KK
	!BB$HH
#6#>RSWRX Y6 7	
 
LL9*\RVQWXY+#%5v7W7W r)   rF   rG   r"   c                 t    t        j                  |       }t        |||      }|j                  |t               y)a  
    Apply [Pyramid Attention Broadcast](https://huggingface.co/papers/2408.12588) to a given torch.nn.Module.

    Args:
        module (`torch.nn.Module`):
            The module to apply Pyramid Attention Broadcast to.
        timestep_skip_range (`Tuple[int, int]`):
            The range of timesteps to skip in the attention layer. The attention computations will be conditionally
            skipped if the current timestep is within the specified range.
        block_skip_range (`int`):
            The number of times a specific attention broadcast is skipped before computing the attention states to
            re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e., old
            attention states will be re-used) before computing the new attention states again.
        current_timestep_callback (`Callable[[], int]`):
            A callback function that returns the current inference timestep.
    N)r   check_if_exists_or_initializerE   register_hook!_PYRAMID_ATTENTION_BROADCAST_HOOK)rO   rF   rG   r"   registryhooks         r'   r   r   "  s6    , 99&AH()<>NPijD4!BCr)   )(ro   dataclassesr   typingr   r   r   r   r   r]   models.attentionr
   models.attention_processorr   r   utilsr   _commonr   r   r   r   hooksr   r   
get_loggerr*   rd   r   r   r3   rE   r^   r_   rk   r0   boolrh   r.   r   r1   r)   r'   <module>r      s   
 ! 8 8  3 B   + 
		H	% %B ! A
 A
 A
Ha a<(I (V9Wehhoo 9WGf 9Wx.
. .*I.	.bD)^+,DsCxD D  (C0	Dr)   