
    bi.A                         d dl mZmZmZmZmZ d dlZd dlmZm	Z	 ddl
mZ ddlmZmZ ddlmZmZ d	d
lmZ d	dlmZ d	dlmZ d	dlmZ d	dlmZ dZ G d dee      Zy)    )CallableDictListOptionalUnionN)CLIPTextModelCLIPTokenizer   )DDPMWuerstchenScheduler)	deprecatereplace_example_docstring   )DeprecatedPipelineMixinDiffusionPipeline   )PaellaVQModel)WuerstchenDiffNeXt)WuerstchenPrior)WuerstchenDecoderPipeline)WuerstchenPriorPipelineax  
    Examples:
        ```py
        >>> from diffusions import WuerstchenCombinedPipeline

        >>> pipe = WuerstchenCombinedPipeline.from_pretrained("warp-ai/Wuerstchen", torch_dtype=torch.float16).to(
        ...     "cuda"
        ... )
        >>> prompt = "an image of a shiba inu, donning a spacesuit and helmet"
        >>> images = pipe(prompt=prompt)
        ```
c            /           e Zd ZdZdZdZdededede	de
d	ed
edede	f fdZd4dee   fdZd5dee   deej&                  ef   fdZd5dee   deej&                  ef   fdZd5dZd Z ej2                          ee      dddddddddddddddddddgddgfdeeeee   f      d ed!ed"ed#eee      d$ed%ed&eee      d'ed(eeeee   f      d)eej<                     d*eej<                     d+ed,eeej>                  eej>                     f      deej<                     d-ee   d.e d/eeeee!gdf      d0ee   d1eeeee!gdf      d2ee   f*d3              Z" xZ#S )6WuerstchenCombinedPipelinea  
    Combined Pipeline for text-to-image generation using Wuerstchen

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

    Args:
        tokenizer (`CLIPTokenizer`):
            The decoder tokenizer to be used for text inputs.
        text_encoder (`CLIPTextModel`):
            The decoder text encoder to be used for text inputs.
        decoder (`WuerstchenDiffNeXt`):
            The decoder model to be used for decoder image generation pipeline.
        scheduler (`DDPMWuerstchenScheduler`):
            The scheduler to be used for decoder image generation pipeline.
        vqgan (`PaellaVQModel`):
            The VQGAN model to be used for decoder image generation pipeline.
        prior_tokenizer (`CLIPTokenizer`):
            The prior tokenizer to be used for text inputs.
        prior_text_encoder (`CLIPTextModel`):
            The prior text encoder to be used for text inputs.
        prior_prior (`WuerstchenPrior`):
            The prior model to be used for prior pipeline.
        prior_scheduler (`DDPMWuerstchenScheduler`):
            The scheduler to be used for prior pipeline.
    z0.33.1T	tokenizertext_encoderdecoder	schedulervqganprior_tokenizerprior_text_encoderprior_priorprior_schedulerc
                     t         
|           | j                  |||||||||		       t        ||||	      | _        t        |||||      | _        y )N)	r   r   r   r   r   r    r   r   r!   )priorr   r   r   )r   r   r   r   r   )super__init__register_modulesr   
prior_piper   decoder_pipe)selfr   r   r   r   r   r   r   r    r!   	__class__s             v/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.pyr%   z#WuerstchenCombinedPipeline.__init__J   sv     	%#1++ 	 
	
 2+%%	
 6%
    Nattention_opc                 :    | j                   j                  |       y N)r(   *enable_xformers_memory_efficient_attention)r)   r-   s     r+   r0   zEWuerstchenCombinedPipeline.enable_xformers_memory_efficient_attentionq   s    DD\Rr,   gpu_iddevicec                 x    | j                   j                  ||       | j                  j                  ||       y)a  
        Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
        to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
        method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
        `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
        r1   r2   N)r'   enable_model_cpu_offloadr(   r)   r1   r2   s      r+   r5   z3WuerstchenCombinedPipeline.enable_model_cpu_offloadt   s4     	00v0N22&2Pr,   c                 x    | j                   j                  ||       | j                  j                  ||       y)u  
        Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
        Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
        GPU only when their specific submodule's `forward` method is called. Offloading happens on a submodule basis.
        Memory savings are higher than using `enable_model_cpu_offload`, but performance is lower.
        r4   N)r'   enable_sequential_cpu_offloadr(   r6   s      r+   r8   z8WuerstchenCombinedPipeline.enable_sequential_cpu_offload~   s4     	55VF5S77vf7Ur,   c                 x    | j                   j                  ||       | j                  j                  ||       y )N)iterabletotal)r'   progress_barr(   )r)   r:   r;   s      r+   r<   z'WuerstchenCombinedPipeline.progress_bar   s2    $$he$D&&&Fr,   c                 t     | j                   j                  di |  | j                  j                  di | y )N )r'   set_progress_bar_configr(   )r)   kwargss     r+   r?   z2WuerstchenCombinedPipeline.set_progress_bar_config   s2    ///9&9111;F;r,   i   <   g      @   g        r   pillatentspromptheightwidthprior_num_inference_stepsprior_timestepsprior_guidance_scalenum_inference_stepsdecoder_timestepsdecoder_guidance_scalenegative_promptprompt_embedsnegative_prompt_embedsnum_images_per_prompt	generatoroutput_typereturn_dictprior_callback_on_step_end(prior_callback_on_step_end_tensor_inputscallback_on_step_end"callback_on_step_end_tensor_inputsc                    i }|j                  dd      !|j                  d      |d<   t        ddd       |j                  dd      !t        ddd       |j                  d      |d<    | j                  di d	||ndd
|d|d|d|d|d||
ndd|d|d|d|d|ddddd|d||}|d   } | j                  d|||nd|||	|
|||||d|}|S )a%  
        Function invoked when calling the pipeline for generation.

        Args:
            prompt (`str` or `List[str]`):
                The prompt or prompts to guide the image generation for the prior and decoder.
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
                if `guidance_scale` is less than `1`).
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings for the prior. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings for the prior. Can be used to easily tweak text inputs, *e.g.*
                prompt weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt`
                input argument.
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                The number of images to generate per prompt.
            height (`int`, *optional*, defaults to 512):
                The height in pixels of the generated image.
            width (`int`, *optional*, defaults to 512):
                The width in pixels of the generated image.
            prior_guidance_scale (`float`, *optional*, defaults to 4.0):
                Guidance scale as defined in [Classifier-Free Diffusion
                Guidance](https://huggingface.co/papers/2207.12598). `prior_guidance_scale` is defined as `w` of
                equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
                setting `prior_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
                closely linked to the text `prompt`, usually at the expense of lower image quality.
            prior_num_inference_steps (`Union[int, Dict[float, int]]`, *optional*, defaults to 60):
                The number of prior denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference. For more specific timestep spacing, you can pass customized
                `prior_timesteps`
            num_inference_steps (`int`, *optional*, defaults to 12):
                The number of decoder denoising steps. More denoising steps usually lead to a higher quality image at
                the expense of slower inference. For more specific timestep spacing, you can pass customized
                `timesteps`
            prior_timesteps (`List[float]`, *optional*):
                Custom timesteps to use for the denoising process for the prior. If not defined, equal spaced
                `prior_num_inference_steps` timesteps are used. Must be in descending order.
            decoder_timesteps (`List[float]`, *optional*):
                Custom timesteps to use for the denoising process for the decoder. If not defined, equal spaced
                `num_inference_steps` timesteps are used. Must be in descending order.
            decoder_guidance_scale (`float`, *optional*, defaults to 0.0):
                Guidance scale as defined in [Classifier-Free Diffusion
                Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
                of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
                `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
                the text `prompt`, usually at the expense of lower image quality.
            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                to make generation deterministic.
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor will ge generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
                (`np.array`) or `"pt"` (`torch.Tensor`).
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
            prior_callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `prior_callback_on_step_end(self: DiffusionPipeline, step: int, timestep:
                int, callback_kwargs: Dict)`.
            prior_callback_on_step_end_tensor_inputs (`List`, *optional*):
                The list of tensor inputs for the `prior_callback_on_step_end` function. The tensors specified in the
                list will be passed as `callback_kwargs` argument. You will only be able to include variables listed in
                the `._callback_tensor_inputs` attribute of your pipeline class.
            callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
                `callback_on_step_end_tensor_inputs`.
            callback_on_step_end_tensor_inputs (`List`, *optional*):
                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
                `._callback_tensor_inputs` attribute of your pipeline class.

        Examples:

        Returns:
            [`~pipelines.ImagePipelineOutput`] or `tuple` [`~pipelines.ImagePipelineOutput`] if `return_dict` is True,
            otherwise a `tuple`. When returning a tuple, the first element is a list with the generated images.
        prior_callbackNcallbackz1.0.0ztPassing `prior_callback` as an input argument to `__call__` is deprecated, consider use `prior_callback_on_step_end`prior_callback_stepszzPassing `prior_callback_steps` as an input argument to `__call__` is deprecated, consider use `prior_callback_on_step_end`callback_stepsrE   rF   rG   rK   	timestepsguidance_scalerN   rO   rP   rQ   rR   rD   rS   ptrT   FrW   rX   r    )image_embeddingsrE   rK   r^   r_   rN   rR   rS   rT   rW   rX   r>   )getpopr   r'   r(   )r)   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rD   rS   rT   rU   rV   rW   rX   r@   prior_kwargsprior_outputsrb   outputss                              r+   __call__z#WuerstchenCombinedPipeline.__call__   s   ^ ::&-9'-zz2B'CL$  G
 ::,d3?& M
 .4ZZ8N-OL)*' 
*26

 
 !:	

 &
 0
 0F/MOSW
 (
 $:
 #8
  
 
 
 
 "<
  0X#
& )+#$## 
-#/6R 3'1+##!5/Q
 
 r,   r/   )NN)$__name__
__module____qualname____doc___last_supported_version_load_connected_pipesr	   r   r   r   r   r   r%   r   r   r0   intr   torchr2   strr5   r8   r<   r?   no_gradr   TEXT2IMAGE_EXAMPLE_DOC_STRINGr   floatTensor	Generatorboolr   rh   __classcell__)r*   s   @r+   r   r   +   s   6 ' %
 %
 $%
 $	%

 +%
 %
 '%
 *%
 %%
 1%
NSxPXGY SQx} QUSXS_S_adSdMe QVHSM VRWX]XdXdfiXiRj VG< U]]_<= 37)+15&)#%37(+;?049=%&MQ*.%* QU?HkKO9B-asDI~./a a 	a
 $'a "$u+.a $a !a $DK0a !&a "%T#Y"78a  -a !) 6a  #a E%//43H"HIJa  %,,'!a" c]#a$ %a& %-XsC6F6L-M$N'a( 37s))a* 'xc40@$0F'GH+a, -1I-a > ar,   r   )typingr   r   r   r   r   rp   transformersr   r	   
schedulersr   utilsr   r   pipeline_utilsr   r   modeling_paella_vq_modelr   modeling_wuerstchen_diffnextr   modeling_wuerstchen_priorr   pipeline_wuerstchenr   pipeline_wuerstchen_priorr   rs   r   r>   r,   r+   <module>r      sE    9 8  5 1 9 G 3 < 6 : >! H!8:K Hr,   