
    bi3                         d dl Z d dlmZmZmZmZmZmZmZ d dl	Z	ddl
mZ ddlmZmZ ddlmZ ddlmZ dd	lmZmZ erdd
lmZ  ee      Z G d de      Zy)    N)TYPE_CHECKINGAnyDictListOptionalTupleUnion   )register_to_config)HookRegistryLayerSkipConfig)_apply_layer_skip_hook)
get_logger   )BaseGuidancerescale_noise_cfg)
BlockStatec                       e Zd ZdZg dZe	 	 	 	 	 	 	 	 	 	 ddededededeee	e
e	   f      d	eee
e   eeef   f   d
edededef fd       Zdej$                  j&                  ddfdZdej$                  j&                  ddfdZ	 d dddeeeeeeeef   f   f      de
d   fdZ	 	 d!dej0                  deej0                     deej0                     dej0                  fdZedefd       Zede	fd       ZdefdZdefdZ xZS )"PerturbedAttentionGuidancea  
    Perturbed Attention Guidance (PAG): https://huggingface.co/papers/2403.17377

    The intution behind PAG can be thought of as moving the CFG predicted distribution estimates further away from
    worse versions of the conditional distribution estimates. PAG was one of the first techniques to introduce the idea
    of using a worse version of the trained model for better guiding itself in the denoising process. It perturbs the
    attention scores of the latent stream by replacing the score matrix with an identity matrix for selectively chosen
    layers.

    Additional reading:
    - [Guiding a Diffusion Model with a Bad Version of Itself](https://huggingface.co/papers/2406.02507)

    PAG is implemented with similar implementation to SkipLayerGuidance due to overlap in the configuration parameters
    and implementation details.

    Args:
        guidance_scale (`float`, defaults to `7.5`):
            The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text
            prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and
            deterioration of image quality.
        perturbed_guidance_scale (`float`, defaults to `2.8`):
            The scale parameter for perturbed attention guidance.
        perturbed_guidance_start (`float`, defaults to `0.01`):
            The fraction of the total number of denoising steps after which perturbed attention guidance starts.
        perturbed_guidance_stop (`float`, defaults to `0.2`):
            The fraction of the total number of denoising steps after which perturbed attention guidance stops.
        perturbed_guidance_layers (`int` or `List[int]`, *optional*):
            The layer indices to apply perturbed attention guidance to. Can be a single integer or a list of integers.
            If not provided, `perturbed_guidance_config` must be provided.
        perturbed_guidance_config (`LayerSkipConfig` or `List[LayerSkipConfig]`, *optional*):
            The configuration for the perturbed attention guidance. Can be a single `LayerSkipConfig` or a list of
            `LayerSkipConfig`. If not provided, `perturbed_guidance_layers` must be provided.
        guidance_rescale (`float`, defaults to `0.0`):
            The rescale factor applied to the noise predictions. This is used to improve image quality and fix
            overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
            Flawed](https://huggingface.co/papers/2305.08891).
        use_original_formulation (`bool`, defaults to `False`):
            Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default,
            we use the diffusers-native implementation that has been in the codebase for a long time. See
            [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details.
        start (`float`, defaults to `0.01`):
            The fraction of the total number of denoising steps after which guidance starts.
        stop (`float`, defaults to `0.2`):
            The fraction of the total number of denoising steps after which guidance stops.
    	pred_condpred_uncondpred_cond_skipNguidance_scaleperturbed_guidance_scaleperturbed_guidance_startperturbed_guidance_stopperturbed_guidance_layersperturbed_guidance_configguidance_rescaleuse_original_formulationstartstopc                 8   t         |   |	|
       || _        || _        || _        || _        || _        || _        ||t        d      t        |dddd      }n|t        d      t        |t              rt        j                  |      }t        |t              r|g}t        |t              st        d      t        t        t        |      d       t              r"|D cg c]  }t        j                  |       }}|D ]P  }|j                   s|j"                  r|j$                  rt&        j)                  d       d|_        d|_        d|_        R || _        t-        t/        | j*                              D cg c]  }d	| 	 c}| _        y c c}w c c}w )
Nz]`perturbed_guidance_layers` must be provided if `perturbed_guidance_config` is not specified.autoFT)indicesfqnskip_attentionskip_attention_scoresskip_ffz_`perturbed_guidance_layers` should not be provided if `perturbed_guidance_config` is specified.z`perturbed_guidance_config` must be a `LayerSkipConfig`, a list of `LayerSkipConfig`, or a dict that can be converted to a `LayerSkipConfig`.a  Perturbed Attention Guidance is designed to perturb attention scores, so `skip_attention` should be False, `skip_attention_scores` should be True, and `skip_ff` should be False. Please check your configuration. Modifying the config to match the expected values.SkipLayerGuidance_)super__init__r   skip_layer_guidance_scaleskip_layer_guidance_startskip_layer_guidance_stopr    r!   
ValueErrorr   
isinstancedict	from_dictlistnextiterr(   r)   r*   loggerwarningskip_layer_configrangelen_skip_layer_hook_names)selfr   r   r   r   r   r   r    r!   r"   r#   configi	__class__s                i/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/guiders/perturbed_attention_guidance.pyr-   z#PerturbedAttentionGuidance.__init__X   s    	%,)A&)A&(?% 0(@%$,(0 s  )81$&*)% )4 u  /6(7(A(AB[(\%/A)B(C%3T: `  T";<dCTJYr(sv)B)B6)J(s%(s/ 	#F$$F,H,HFNNj %*F!+/F("FN	# ";INsSWSiSiOjIk&lA);A3'?&l# )t 'ms   #F=Fdenoiserreturnc                     | xj                   dz  c_         | j                         rT| j                  rG| j                   dkD  r7t        | j                  | j
                        D ]  \  }}t        |||        y y y y )Nr   )name)_count_prepared_is_slg_enabledis_conditionalzipr=   r:   r   )r>   rC   rF   r?   s       rB   prepare_modelsz)PerturbedAttentionGuidance.prepare_models   su    !!d&9&9d>R>RUV>V #D$?$?AWAW X Df&xdCD ?W&9!    c                     | j                         rW| j                  rJ| j                  dkD  r:t        j                  |      }| j
                  D ]  }|j                  |d        y y y y )Nr   T)recurse)rH   rI   rG   r   check_if_exists_or_initializer=   remove_hook)r>   rC   registry	hook_names       rB   cleanup_modelsz)PerturbedAttentionGuidance.cleanup_models   sh    !d&9&9d>R>RUV>V#AA(KH!88 >	$$Y$=> ?W&9!rL   datar   input_fieldsc                 D   || j                   }| j                  dk(  rdg}dg}n4| j                  dk(  rddg}| j                         rddgnddg}ng d}g d}g }t        | j                        D ]-  }| j	                  ||||   ||         }|j                  |       / |S )	Nr   r   r   r
   r   r   )r   r   r   r   )_input_fieldsnum_conditions_is_cfg_enabledr;   _prepare_batchappend)r>   rT   rU   tuple_indicesinput_predictionsdata_batchesr@   
data_batchs           rB   prepare_inputsz)PerturbedAttentionGuidance.prepare_inputs   s     --L!#CM!,  A%FM040D0D0Fm,[ZjLk  &M Nt**+ 	,A,,\4qAQSdefSghJ
+	, rL   r   r   r   c                    d }| j                         s| j                         s|}n| j                         s(||z
  }| j                  r|n|}|| j                  |z  z   }nt| j                         s(||z
  }| j                  r|n|}|| j                  |z  z   }n<||z
  }||z
  }| j                  r|n|}|| j                  |z  z   | j                  |z  z   }| j
                  dkD  rt        ||| j
                        }|i fS )N        )rY   rH   r!   r.   r   r    r   )r>   r   r   r   predshift
shift_skips          rB   forwardz"PerturbedAttentionGuidance.forward   s    ##%d.B.B.DD%%'.E $ = =9>D$885@@D%%'+E $ = =9;D$--55D+E"^3J $ = =9;D$--558V8VYc8ccD  3&$T9d6K6KLDRxrL   c                 B    | j                   dk(  xs | j                   dk(  S )Nr      )rG   )r>   s    rB   rI   z)PerturbedAttentionGuidance.is_conditional   s%     ##q(ED,@,@A,EErL   c                 ^    d}| j                         r|dz  }| j                         r|dz  }|S )Nr   )rY   rH   )r>   rX   s     rB   rX   z)PerturbedAttentionGuidance.num_conditions   s;     !aN!aNrL   c                    | j                   syd}| j                  ^t        | j                  | j                  z        }t        | j                  | j                  z        }|| j
                  cxk  xr |k  nc }d}| j                  r!t        j                  | j                  d      }n t        j                  | j                  d      }|xr | S )NFTrb         ?)
_enabled_num_inference_stepsint_start_stop_stepr!   mathiscloser   )r>   is_within_rangeskip_start_stepskip_stop_stepis_closes        rB   rY   z*PerturbedAttentionGuidance._is_cfg_enabled   s    }}$$0!$++0I0I"IJO d.G.G!GHN-LnLO((||D$7$7=H||D$7$7=H/x</rL   c                 B   | j                   syd}| j                  ^t        | j                  | j                  z        }t        | j                  | j                  z        }|| j
                  cxk  xr |k  nc }t        j                  | j                  d      }|xr | S )NFTrb   )	rl   rm   rn   r/   r0   rq   rr   rs   r.   )r>   rt   ru   rv   is_zeros        rB   rH   z*PerturbedAttentionGuidance._is_slg_enabled  s    }}$$0!$"@"@4C\C\"\]O !>!>AZAZ!Z[N-

K^KO,,t==sC.w;.rL   )
g      @gffffff@g{Gz?g?NNrb   Frb   rk   )N)NN) __name__
__module____qualname____doc___input_predictionsr   floatr   r	   rn   r   r   r   strr   boolr-   torchnnModulerK   rS   r   r`   Tensorrf   propertyrI   rX   rY   rH   __classcell__)rA   s   @rB   r   r   "   s   ,f H !$*-*.),EIcg"%).@m@m #(@m #(	@m
 "'@m $,E#tCy.,A#B@m $)$:OQUVY[^V^Q_)_#`@m  @m #'@m @m @m @mFDuxx D4 D>uxx >4 > dh 08c5eTWY\T\oI]C^>^9_0`	l	6 /315	<< ell+ !.	
 
: F F F   0 0&/ /rL   r   )rr   typingr   r   r   r   r   r   r	   r   configuration_utilsr   hooksr   r   hooks.layer_skipr   utilsr   guider_utilsr   r   "modular_pipelines.modular_pipeliner   rz   r8   r    rL   rB   <module>r      sG     I I I  4 1 5  9 ? 
H	m/ m/rL   