
    bi(                         d dl mZmZmZmZmZ d dlZddlmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZmZ  e       rd dlmc mZ d
ZndZ G d de      Zy)    )DictListOptionalTupleUnionN   )AutoencoderKLDiTTransformer2DModel)KarrasDiffusionSchedulers)is_torch_xla_available)randn_tensor   )DiffusionPipelineImagePipelineOutputTFc                       e Zd ZdZdZ	 ddedededee	e
ef      f fdZdeeee   f   d	ee
   fd
Z ej"                         	 	 	 	 	 ddee
   dedeeej&                  eej&                     f      de
dee   ded	eeef   fd       Z xZS )DiTPipelinea  
    Pipeline for image generation based on a Transformer backbone instead of a UNet.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
    implemented for all pipelines (downloading, saving, running on a particular device, etc.).

    Parameters:
        transformer ([`DiTTransformer2DModel`]):
            A class conditioned `DiTTransformer2DModel` to denoise the encoded image latents. Initially published as
            [`Transformer2DModel`](https://huggingface.co/facebook/DiT-XL-2-256/blob/main/transformer/config.json#L2)
            in the config, but the mismatch can be ignored.
        vae ([`AutoencoderKL`]):
            Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
        scheduler ([`DDIMScheduler`]):
            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    ztransformer->vaetransformervae	schedulerid2labelc                    t         |           | j                  |||       i | _        ||j	                         D ]O  \  }}|j                  d      D ]6  }t        |      | j                  |j                         j                         <   8 Q t        t        | j                  j	                                     | _        y y )N)r   r   r   ,)super__init__register_moduleslabelsitemssplitintlstriprstripdictsorted)	selfr   r   r   r   keyvaluelabel	__class__s	           _/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/dit/pipeline_dit.pyr   zDiTPipeline.__init__<   s     	+3)T &nn. D
U"[[- DE;>s8DKK 5 5 78DD vdkk&7&7&9:;DK	      r'   returnc                     t        |t              st        |      }|D ]+  }|| j                  vst        | d| j                   d       |D cg c]  }| j                  |    c}S c c}w )a0  

        Map label strings from ImageNet to corresponding class ids.

        Parameters:
            label (`str` or `dict` of `str`):
                Label strings to be mapped to class ids.

        Returns:
            `list` of `int`:
                Class ids to be processed by pipeline.
        zK does not exist. Please make sure to select one of the following labels: 
 .)
isinstancelistr   
ValueError)r$   r'   ls      r)   get_label_idszDiTPipeline.get_label_idsN   sw     %&KE 	A# cefjfqfqerrst 	 )..1A...s   A)class_labelsguidance_scale	generatornum_inference_stepsoutput_typereturn_dictc                 	   t        |      }| j                  j                  j                  }| j                  j                  j                  }	t        ||	||f|| j                  | j                  j                        }
|dkD  rt        j                  |
gdz        n|
}t        j                  || j                        j                  d      }t        j                  dg|z  | j                        }|dkD  rt        j                  ||gd      n|}| j                  j                  |       | j                  | j                  j                        D ]  }|dkD  r*|dt        |      dz   }t        j                  ||gd	      }| j                  j!                  ||      }|}t        j"                  |      s|j$                  j&                  d
k(  }|j$                  j&                  dk(  }t)        |t*              r%|s|rt        j,                  nt        j.                  }n$|s|rt        j0                  nt        j2                  }t        j                  |g||j$                        }n6t        |j4                        dk(  r|d   j7                  |j$                        }|j9                  |j4                  d         }| j                  |||      j:                  }|dkD  rz|ddd|	f   |dd|	df   }}t        j<                  |t        |      dz  d	      \  }}||||z
  z  z   }t        j                  ||gd	      }t        j                  ||gd	      }| j                  j                  j>                  dz  |	k(  rt        j<                  ||	d	      \  }}n|}| j                  jA                  |||      jB                  }tD        sytG        jH                           |dkD  r|jK                  dd	      \  }
}n|}
d| jL                  j                  jN                  z  |
z  }
| jL                  jQ                  |
      j:                  }|dz  dz   jS                  dd      }|jU                         jW                  dddd      j+                         jY                         }|dk(  r| j[                  |      }| j]                          |s|fS t_        |      S )a>	  
        The call function to the pipeline for generation.

        Args:
            class_labels (List[int]):
                List of ImageNet class labels for the images to be generated.
            guidance_scale (`float`, *optional*, defaults to 4.0):
                A higher guidance scale value encourages the model to generate images closely linked to the text
                `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
            generator (`torch.Generator`, *optional*):
                A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
                generation deterministic.
            num_inference_steps (`int`, *optional*, defaults to 250):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generated image. Choose between `PIL.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`ImagePipelineOutput`] instead of a plain tuple.

        Examples:

        ```py
        >>> from diffusers import DiTPipeline, DPMSolverMultistepScheduler
        >>> import torch

        >>> pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-256", torch_dtype=torch.float16)
        >>> pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
        >>> pipe = pipe.to("cuda")

        >>> # pick words from Imagenet class labels
        >>> pipe.labels  # to print all available words

        >>> # pick words that exist in ImageNet
        >>> words = ["white shark", "umbrella"]

        >>> class_ids = pipe.get_label_ids(words)

        >>> generator = torch.manual_seed(33)
        >>> output = pipe(class_labels=class_ids, num_inference_steps=25, generator=generator)

        >>> image = output.images[0]  # label 'white shark'
        ```

        Returns:
            [`~pipelines.ImagePipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
                returned where the first element is a list with the generated images
        )shaper5   devicedtype   r   )r;   i  r   N)dimmpsnpu)r<   r;   )timestepr3   g      ?r   pil)images)0lenr   configsample_sizein_channelsr   _execution_devicer<   torchcattensorreshaper   set_timestepsprogress_bar	timestepsscale_model_input	is_tensorr;   typer.   floatfloat32float64int32int64r:   toexpandsampler   out_channelsstepprev_sampleXLA_AVAILABLExm	mark_stepchunkr   scaling_factordecodeclampcpupermutenumpynumpy_to_pilmaybe_free_model_hooksr   )r$   r3   r4   r5   r6   r7   r8   
batch_sizelatent_sizelatent_channelslatentslatent_model_input
class_nullclass_labels_inputthalfrP   is_mpsis_npur<   
noise_predepsrestcond_eps
uncond_epshalf_epsmodel_output_sampless                                r)   __call__zDiTPipeline.__call__g   sJ   x &
&&--99**11==[I))""((	
 :H!9KUYYy1}5QX||L9O9OPXXY[\\\4&:"5d>T>TU
IWZ[I[UYYj'A1Eam 	$$%89""4>>#;#;< .	A!)*HC0B,Cq,HI%*YYd|%C"!%!A!ABTVW!XI??9- ,22775@+22775@i/.4EMMU]]E,2fEKK5;;E!LL)EJ\JcJcd	Y__%*%dO../A/H/HI	!(();)A)A!)DEI))"YEW * f 
 !&q*:?*:':;Z?K[H[=\T',{{3CA1'M$*%(Z:O(PPii8 4!<"YYT{:
 &&33q8OK"'++j/q"Qa) "&!4!4\1FX!Y!e!e].	` A+11!1;JGQ(Gdhhoo444w>((//'*11Q;$++Aq1 ++-''1a399;AAC%''0G 	##%:"'22r*   )N)g      @N2   rC   T)__name__
__module____qualname____doc__model_cpu_offload_seqr
   r	   r   r   r   r   strr   r   r   r2   rJ   no_gradrT   	Generatorboolr   r   r   __classcell__)r(   s   @r)   r   r   (   s   " / .2<*< < -	<
 4S>*<$/5d3i#8 /T#Y /2 U]]_ !$MQ#%%* S33iS3 S3 E%//43H"HIJ	S3
 !S3 c]S3 S3 
"E)	*S3 S3r*   r   )typingr   r   r   r   r   rJ   modelsr	   r
   
schedulersr   utilsr   utils.torch_utilsr   pipeline_utilsr   r   torch_xla.core.xla_modelcore	xla_modelr`   r_   r    r*   r)   <module>r      sF   * 6 5  : 3 + - C ))MMS3# S3r*   