
    bi2,              	          d dl mZ d dlmZmZ d dlZddlmZ ddlm	Z	 ddl
mZ dd	lmZ dd
lmZmZmZmZ  ee      ZdZdZe G d d             Z G d de      Z G d de      Z G d de      Zdej6                  j8                  deddfdZdej6                  j8                  dededdfdZddej6                  j8                  dede ddfdZ!y)     )	dataclass)TupleUnionN   )
get_logger)unwrap_module   )"_ALL_TRANSFORMER_BLOCK_IDENTIFIERS)TransformerBlockRegistry)	BaseStateHookRegistry	ModelHookStateManagerfbc_leader_block_hookfbc_block_hookc                        e Zd ZU dZdZeed<   y)FirstBlockCacheConfiga_  
    Configuration for [First Block
    Cache](https://github.com/chengzeyi/ParaAttention/blob/7a266123671b55e7e5a2fe9af3121f07a36afc78/README.md#first-block-cache-our-dynamic-caching).

    Args:
        threshold (`float`, defaults to `0.05`):
            The threshold to determine whether or not a forward pass through all layers of the model is required. A
            higher threshold usually results in a forward pass through a lower number of layers and faster inference,
            but might lead to poorer generation quality. A lower threshold may not result in significant generation
            speedup. The threshold is compared against the absmean difference of the residuals between the current and
            cached outputs from the first transformer block. If the difference is below the threshold, the forward pass
            is skipped.
    g?	thresholdN)__name__
__module____qualname____doc__r   float__annotations__     \/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/hooks/first_block_cache.pyr   r   !   s     Iur   r   c                   &     e Zd Zd fdZd Z xZS )FBCSharedBlockStatec                 Z    t         |           d | _        d | _        d | _        d| _        y NT)super__init__head_block_outputhead_block_residualtail_block_residualsshould_compute)self	__class__s    r   r#   zFBCSharedBlockState.__init__5   s.    PT15 SW!$(r   c                      d | _         d| _        y r!   )r&   r'   )r(   s    r   resetzFBCSharedBlockState.reset=   s    $(!"r   )returnN)r   r   r   r#   r+   __classcell__r)   s   @r   r   r   4   s    )#r   r   c                       e Zd ZdZdedefdZd Zdej                  j                  fdZd Zej                  j                  d	ej                  d
efd       Zy)FBCHeadBlockHookTstate_managerr   c                 .    || _         || _        d | _        y N)r1   r   	_metadata)r(   r1   r   s      r   r#   zFBCHeadBlockHook.__init__E   s    *"r   c                 d    t        |      }t        j                  |j                        | _        |S r3   r   r   getr)   r4   r(   moduleunwrapped_modules      r   initialize_hookz FBCHeadBlockHook.initialize_hookJ   +    (01556F6P6PQr   r9   c                    | j                   j                  d||      } | j                  j                  |i |}t	        |t
              }|r|| j                   j                     |z
  }n||z
  }| j                  j                         }d x}	}
| j                  |      }||_
        |s|r*|j                  d   || j                   j                     z   }	n|j                  d   |z   }	| j                   j                  -|sJ |j                  d   || j                   j                     z   }
|rPd gt        |      z  }|	|| j                   j                  <   |
|| j                   j                  <   t        |      }|}|S |	}|}|S |rHd gt        |      z  }|| j                   j                     |d<   || j                   j                     |d<   n|}||_        ||_        |S )Nhidden_statesr   r	   )r4   _get_parameter_from_args_kwargsfn_reforiginal_forward
isinstancetuplereturn_hidden_states_indexr1   	get_state _should_compute_remaining_blocksr'   r&   "return_encoder_hidden_states_indexlenr$   r%   )r(   r9   argskwargsoriginal_hidden_statesoutputis_output_tuplehidden_states_residualshared_stater>   encoder_hidden_statesr'   return_outputr$   s                 r   new_forwardzFBCHeadBlockHook.new_forwardO   s   !%!O!OP_aegm!n---t>v>$VU3%+DNN,U,U%VYo%o"%+.D%D",0,>,>,H,H,J044->>?UV&4# 55a86$..BkBk;ll  !- A A! Dv M~~@@L&& 55a86$..BsBs;tt & !%V 4KXdnnGGHShdnnOOP %m 4 #F  !."F  %)FS[$8!'-dnn.W.W'X!!$'-dnn._._'`!!$$*!->L*/EL,r   c                 :    | j                   j                          |S r3   )r1   r+   )r(   r9   s     r   reset_statezFBCHeadBlockHook.reset_state   s      "r   rN   r,   c                 *   | j                   j                         }|j                  y|j                  }||z
  j                         j	                         }|j                         j	                         }||z  j                         }|| j                  kD  S r!   )r1   rE   r%   absmeanitemr   )r(   rN   rO   prev_hidden_states_residualabsmeanprev_hidden_states_absmeandiffs          r   rF   z1FBCHeadBlockHook._should_compute_remaining_blocks   s    ))335++3&2&F&F#),GGLLNSSU%@%D%D%F%K%K%M"44::<dnn$$r   N)r   r   r   _is_statefulr   r   r#   r;   torchnnModulerR   rT   compilerdisableTensorboolrF   r   r   r   r0   r0   B   sj    Ll u 

1%((// 1f ^^%u|| %X\ % %r   r0   c                   d     e Zd Zddedef fdZd Zdej                  j                  fdZ
 xZS )FBCBlockHookr1   is_tailc                 L    t         |           || _        || _        d | _        y r3   )r"   r#   r1   rg   r4   )r(   r1   rg   r)   s      r   r#   zFBCBlockHook.__init__   s$    *r   c                 d    t        |      }t        j                  |j                        | _        |S r3   r6   r8   s      r   r;   zFBCBlockHook.initialize_hook   r<   r   r9   c                    | j                   j                  d||      }d }| j                   j                  | j                   j                  d||      }| j                  j	                         }|j
                  r | j                  j                  |i |}| j                  rd x}}	t        |t              rS|| j                   j                     |j                  d   z
  }|| j                   j                     |j                  d   z
  }	n||j                  z
  }||	f|_        |S ||}
|
S d d g}
||
| j                   j                  <   ||
| j                   j                  <   t        |
      }
|
S )Nr>   rP   r   r	   )r4   r?   rG   r1   rE   r'   r@   rA   rg   rB   rC   rD   r$   r&   )r(   r9   rI   rJ   rK   original_encoder_hidden_statesrO   rL   rN   encoder_hidden_states_residualrQ   s              r   rR   zFBCBlockHook.new_forward   sv   !%!O!OP_aegm!n)-&>><<H-1^^-[-['v.* ))335&&1T[[114B6BF||JNN&)Gfe,t~~HHILLjLjklLmm + t~~PPQT`TrTrstTuu 3 .4l6T6T-T*5KMk4l1M)12M 	 "4LMG]M$..CCDOmM$..KKL!-0Mr   F)r   r   r   r   rd   r#   r;   r^   r_   r`   rR   r-   r.   s   @r   rf   rf      s/    l T 
!%((// !r   rf   r9   configr,   c                 f   t        t        di       }g }| j                         D ]]  \  }}|t        vs$t	        |t
        j                  j                        s3t        |      D ]  \  }}|j                  | d| |f        _ |j                  d      \  }}	|j                  d      \  }
}t        j                  d| d       t        |	||j                         |D ]*  \  }}t        j                  d| d       t        ||       , t        j                  d|
 d       t        ||d	
       y)a  
    Applies [First Block
    Cache](https://github.com/chengzeyi/ParaAttention/blob/4de137c5b96416489f06e43e19f2c14a772e28fd/README.md#first-block-cache-our-dynamic-caching)
    to a given module.

    First Block Cache builds on the ideas of [TeaCache](https://huggingface.co/papers/2411.19108). It is much simpler
    to implement generically for a wide range of models and has been integrated first for experimental purposes.

    Args:
        module (`torch.nn.Module`):
            The pytorch module to apply FBCache to. Typically, this should be a transformer architecture supported in
            Diffusers, such as `CogVideoXTransformer3DModel`, but external implementations may also work.
        config (`FirstBlockCacheConfig`):
            The configuration to use for applying the FBCache method.

    Example:
        ```python
        >>> import torch
        >>> from diffusers import CogView4Pipeline
        >>> from diffusers.hooks import apply_first_block_cache, FirstBlockCacheConfig

        >>> pipe = CogView4Pipeline.from_pretrained("THUDM/CogView4-6B", torch_dtype=torch.bfloat16)
        >>> pipe.to("cuda")

        >>> apply_first_block_cache(pipe.transformer, FirstBlockCacheConfig(threshold=0.2))

        >>> prompt = "A photo of an astronaut riding a horse on mars"
        >>> image = pipe(prompt, generator=torch.Generator().manual_seed(42)).images[0]
        >>> image.save("output.png")
        ```
    r   .r   zApplying FBCHeadBlockHook to ''zApplying FBCBlockHook to 'z%Applying FBCBlockHook to tail block 'T)rg   N)r   r   named_childrenr
   rB   r^   r_   
ModuleList	enumerateappendpoploggerdebug_apply_fbc_head_block_hookr   _apply_fbc_block_hook)r9   rn   r1   remaining_blocksname	submoduleindexblockhead_block_name
head_blocktail_block_name
tail_blocks               r   apply_first_block_cacher      s?   B !!4b"=M!002 @i99IW\W_W_WjWjAk%i0 	@LE5##vQug%6$>?	@@ #3"6"6q"9OZ"2"6"6r":OZ
LL1/1B!DEz=&:J:JK' 4e1$q9:e]34 LL88IKL*mTBr   r   r1   r   c                 r    t        j                  |       }t        ||      }|j                  |t               y r3   )r   check_if_exists_or_initializer0   register_hook_FBC_LEADER_BLOCK_HOOK)r   r1   r   registryhooks        r   rz   rz      s/    99%@HM95D4!78r   rg   c                 r    t        j                  |       }t        ||      }|j                  |t               y r3   )r   r   rf   r   _FBC_BLOCK_HOOK)r   r1   rg   r   r   s        r   r{   r{      s.    99%@Hw/D41r   rm   )"dataclassesr   typingr   r   r^   utilsr   utils.torch_utilsr   _commonr
   _helpersr   hooksr   r   r   r   r   rx   r   r   r   r   r0   rf   r_   r`   r   r   rz   rd   r{   r   r   r   <module>r      s    "    - 7 . C C 
H	0 "   $#) #M%y M%`-9 -`5CEHHOO 5C=R 5CW[ 5Cp9ehhoo 9l 9_d 9im 92 2 2X\ 2im 2r   