
    bi                       d dl Z d dlmZmZmZmZmZmZmZ d dl	Z	d dl
mZmZmZmZmZ ddlmZ ddlmZmZmZmZ ddlmZmZmZmZmZ ddlmZmZm Z  dd	l!m"Z" dd
l#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8m9Z9 ddl:m;Z;  e,       rd dl<m=c m>Z? dZ@ndZ@ e-j                  eB      ZCdZDddZE	 	 	 	 ddeeF   deeeGe	j                  f      deeeF      deeeI      fdZJ G d de8e9eeeee6	      ZKy)     N)AnyCallableDictListOptionalTupleUnion)CLIPImageProcessorCLIPTextModelCLIPTextModelWithProjectionCLIPTokenizerCLIPVisionModelWithProjection   )PipelineImageInput)FromSingleFileMixinIPAdapterMixin StableDiffusionXLLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKLImageProjectionMotionAdapterUNet2DConditionModelUNetMotionModel)AttnProcessor2_0FusedAttnProcessor2_0XFormersAttnProcessor)adjust_lora_scale_text_encoder)DDIMSchedulerDPMSolverMultistepSchedulerEulerAncestralDiscreteSchedulerEulerDiscreteSchedulerLMSDiscreteSchedulerPNDMScheduler)USE_PEFT_BACKENDis_torch_xla_availableloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)randn_tensor)VideoProcessor   )FreeInitMixin)DiffusionPipelineStableDiffusionMixin   )AnimateDiffPipelineOutputTFa$  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers.models import MotionAdapter
        >>> from diffusers import AnimateDiffSDXLPipeline, DDIMScheduler
        >>> from diffusers.utils import export_to_gif

        >>> adapter = MotionAdapter.from_pretrained(
        ...     "a-r-r-o-w/animatediff-motion-adapter-sdxl-beta", torch_dtype=torch.float16
        ... )

        >>> model_id = "stabilityai/stable-diffusion-xl-base-1.0"
        >>> scheduler = DDIMScheduler.from_pretrained(
        ...     model_id,
        ...     subfolder="scheduler",
        ...     clip_sample=False,
        ...     timestep_spacing="linspace",
        ...     beta_schedule="linear",
        ...     steps_offset=1,
        ... )
        >>> pipe = AnimateDiffSDXLPipeline.from_pretrained(
        ...     model_id,
        ...     motion_adapter=adapter,
        ...     scheduler=scheduler,
        ...     torch_dtype=torch.float16,
        ...     variant="fp16",
        ... ).to("cuda")

        >>> # enable memory savings
        >>> pipe.enable_vae_slicing()
        >>> pipe.enable_vae_tiling()

        >>> output = pipe(
        ...     prompt="a panda surfing in the ocean, realistic, high quality",
        ...     negative_prompt="low quality, worst quality",
        ...     num_inference_steps=20,
        ...     guidance_scale=8,
        ...     width=1024,
        ...     height=1024,
        ...     num_frames=16,
        ... )

        >>> frames = output.frames[0]
        >>> export_to_gif(frames, "animation.gif")
        ```
c                     |j                  t        t        d|j                              d      }| j                  t        t        d| j                              d      }| ||z  z  }||z  d|z
  | z  z   } | S )a  
    Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
    Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
    Flawed](https://huggingface.co/papers/2305.08891).

    Args:
        noise_cfg (`torch.Tensor`):
            The predicted noise tensor for the guided diffusion process.
        noise_pred_text (`torch.Tensor`):
            The predicted noise tensor for the text-guided diffusion process.
        guidance_rescale (`float`, *optional*, defaults to 0.0):
            A rescale factor applied to the noise predictions.

    Returns:
        noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
    r0   T)dimkeepdim)stdlistrangendim)	noise_cfgnoise_pred_textguidance_rescalestd_textstd_cfgnoise_pred_rescaleds         t/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.pyrescale_noise_cfgr@   |   s    " ""tE!_5I5I,J'KUY"ZHmmU1inn%= >mMG#x''9: #66!>N:NR[9[[I    num_inference_stepsdevice	timestepssigmasc                    ||t        d      |dt        t        j                  | j                        j
                  j                               v }|st        d| j                   d       | j                  d
||d| | j                  }t        |      }||fS |dt        t        j                  | j                        j
                  j                               v }|st        d| j                   d       | j                  d
||d| | j                  }t        |      }||fS  | j                  |fd	|i| | j                  }||fS )a  
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
            must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`List[int]`, *optional*):
            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
            `num_inference_steps` and `sigmas` must be `None`.
        sigmas (`List[float]`, *optional*):
            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
            `num_inference_steps` and `timesteps` must be `None`.

    Returns:
        `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesrD   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)rD   rC   rE   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)rE   rC   rC    )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__rD   len)	schedulerrB   rC   rD   rE   kwargsaccepts_timestepsaccept_sigmass           r?   retrieve_timestepsrU      s   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	 	M)FMfM''	!)n ))) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	 	GvfGG''	!)n ))) 	 	 3MFMfM''	)))rA   c            K           e Zd ZdZdZg dZg dZ	 	 	 dXdeded	e	d
e
de
deeef   dedeeeeeeef   dededef fdZ	 	 	 	 	 	 	 	 	 	 	 	 dYdedee   deej8                     dededee   dee   deej<                     deej<                     deej<                     deej<                     dee   d ee   fd!Z dZd"Z!d# Z"d$ Z#d% Z$	 	 	 	 	 	 	 d[d&Z%	 dZd'Z&	 dZd(Z'd) Z(d*ejR                  fd+ej<                  d,ed-ejT                  d.ej<                  fd/Z+e,d0        Z-e,d1        Z.e,d2        Z/e,d3        Z0e,d4        Z1e,d5        Z2e,d6        Z3e,d7        Z4 ejj                          e6e7      ddd8ddd9dddd:dddd;ddddddddd<ddd;dd=ddd=dddd>gf#deee8e   f   deeee8e   f      d?ed@ee   dAee   dBedCe8e   dDe8e   dEee   dFedeeee8e   f      deeee8e   f      dee   dGedHeeejr                  e8ejr                     f      d>eej<                     deej<                     deej<                     deej<                     deej<                     dIee:   dJee8ej<                        dKee   dLedMee;ee<f      dNedOee=eef      dPe=eef   dQee=eef      dRee=eef      dSe=eef   dTee=eef      d ee   dUee>eee;gdf      dVe8e   fFdW              Z? xZ@S )\AnimateDiffSDXLPipelinea
  
    Pipeline for text-to-video generation using Stable Diffusion XL.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

    The pipeline also inherits the following loading methods:
        - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
        - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
        - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
        - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
        - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters

    Args:
        vae ([`AutoencoderKL`]):
            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
        text_encoder ([`CLIPTextModel`]):
            Frozen text-encoder. Stable Diffusion XL uses the text portion of
            [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
            the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
        text_encoder_2 ([` CLIPTextModelWithProjection`]):
            Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of
            [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
            specifically the
            [laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)
            variant.
        tokenizer (`CLIPTokenizer`):
            Tokenizer of class
            [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
        tokenizer_2 (`CLIPTokenizer`):
            Second Tokenizer of class
            [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
        unet ([`UNet2DConditionModel`]):
            Conditional U-Net architecture to denoise the encoded image latents.
        scheduler ([`SchedulerMixin`]):
            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
        force_zeros_for_empty_prompt (`bool`, *optional*, defaults to `"True"`):
            Whether the negative prompt embeddings shall be forced to always be set to 0. Also see the config of
            `stabilityai/stable-diffusion-xl-base-1-0`.
    z6text_encoder->text_encoder_2->image_encoder->unet->vae)	tokenizertokenizer_2text_encodertext_encoder_2image_encoderfeature_extractor)latentsprompt_embedsnegative_prompt_embedsadd_text_embedsadd_time_idsnegative_pooled_prompt_embedsnegative_add_time_idsNTvaerZ   r[   rX   rY   unetmotion_adapterrQ   r\   r]   force_zeros_for_empty_promptc                 N   t         |           t        |t              rt	        j
                  ||      }| j                  |||||||||	|

       | j                  |       t        | dd       r/dt        | j                  j                  j                        dz
  z  nd| _        t        | j                        | _        t!        | d      rR| j"                  Ft!        | j"                  j                  d	      r&| j"                  j                  j$                  | _        y d
| _        y )N)
re   rZ   r[   rX   rY   rf   rg   rQ   r\   r]   )rh   re   r,   r0      )vae_scale_factorrf   sample_size   )super__init__
isinstancer   r   from_unet2dregister_modulesregister_to_configgetattrrP   re   configblock_out_channelsrk   r+   video_processorhasattrrf   rl   default_sample_size)selfre   rZ   r[   rX   rY   rf   rg   rQ   r\   r]   rh   rO   s               r?   ro   z AnimateDiffSDXLPipeline.__init__  s   * 	d01"..t^DD%)#)'/ 	 	
 	=YZV]^bdikoVpc$((//*L*L&MPQ&Q Rvw-t?T?TU tV$)>7499K[K[]jCk II(( 	   	 rA   r0   promptprompt_2rC   num_videos_per_promptdo_classifier_free_guidancenegative_promptnegative_prompt_2r_   r`   pooled_prompt_embedsrc   
lora_scale	clip_skipc                 
   |xs | j                   }|t        | t              r|| _        | j                  3t
        st        | j                  |       nt        | j                  |       | j                  3t
        st        | j                  |       nt        | j                  |       t        |t              r|gn|}|t        |      }n|j                  d   }| j                  | j                  | j                  gn| j                  g}| j                  | j                  | j                  gn| j                  g}||xs |}t        |t              r|gn|}g }||g}t        |||      D ]J  \  }}}t        | t              r| j!                  ||      } ||d|j"                  dd      }|j$                  } ||dd      j$                  }|j                  d	   |j                  d	   k\  rbt'        j(                  ||      sL|j+                  |dd|j"                  d
z
  d	f         }t,        j/                  d|j"                   d|         ||j1                  |      d      }|
|d   j2                  dk(  r|d   }
||j4                  d   }n|j4                  |dz       }|j7                  |       M t'        j8                  |d	      }|du xr | j:                  j<                  }|r0|	.|r,t'        j>                  |      }	t'        j>                  |
      }nz|rw|	t|xs d}|xs |}t        |t              r||gz  n|}t        |t              r||gz  n|}|:tA        |      tA        |      ur$tC        dtA        |       dtA        |       d      |t        |      k7  r!tE        d| dt        |       d| d| d	      ||g}g }t        |||      D ]  \  }}}t        | t              r| j!                  ||      }|j                  d
   } ||d|dd      } ||j$                  j1                  |      d      }	||	d   j2                  dk(  r|	d   }|	j4                  d   }	|j7                  |	        t'        j8                  |d	      }	| j                  (|j1                  | j                  jF                  |      }n'|j1                  | jH                  jF                  |      }|j                  \  }}} |jK                  d
|d
      }|jM                  ||z  |d	      }|r|	j                  d
   }| j                  (|	j1                  | j                  jF                  |      }	n'|	j1                  | jH                  jF                  |      }	|	jK                  d
|d
      }	|	jM                  ||z  |d	      }	|
jK                  d
|      jM                  ||z  d	      }
|r%|jK                  d
|      jM                  ||z  d	      }| j                  ,t        | t              rt
        rtO        | j                  |       | j                  ,t        | t              rt
        rtO        | j                  |       ||	|
|fS )a\  
        Encodes the prompt into text encoder hidden states.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                prompt to be encoded
            prompt_2 (`str` or `List[str]`, *optional*):
                The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
                used in both text-encoders
            device: (`torch.device`):
                torch device
            num_videos_per_prompt (`int`):
                number of images that should be generated per prompt
            do_classifier_free_guidance (`bool`):
                whether to use classifier free guidance or not
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the image generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            negative_prompt_2 (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
                `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                argument.
            pooled_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
                If not provided, pooled text embeddings will be generated from `prompt` input argument.
            negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
                input argument.
            lora_scale (`float`, *optional*):
                A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
            clip_skip (`int`, *optional*):
                Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
                the output of the pre-final layer will be used for computing the prompt embeddings.
        Nr   
max_lengthTpt)paddingr   
truncationreturn_tensorslongest)r   r   r0   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: output_hidden_statesr,   r3    z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)dtyperC   )(_execution_devicerp   r   _lora_scalerZ   r$   r   r(   r[   strrP   shaperX   rY   zipr   maybe_convert_promptmodel_max_length	input_idstorchequalbatch_decodeloggerwarningtor8   hidden_statesappendconcatru   rh   
zeros_liketype	TypeErrorrH   r   rf   repeatviewr)   )!rz   r{   r|   rC   r}   r~   r   r   r_   r`   r   rc   r   r   
batch_size
tokenizerstext_encodersprompt_embeds_listpromptsrX   rZ   text_inputstext_input_idsuntruncated_idsremoved_textzero_out_negative_promptuncond_tokensnegative_prompt_embeds_listr   uncond_inputbs_embedseq_len_s!                                    r?   encode_promptz%AnimateDiffSDXLPipeline.encode_promptI  s   t 1411 !j7W&X)D   ,'243D3DjQ%d&7&7D"".'243F3F
S%d&9&9:F'4&&VJ&,,Q/J <@>>;Udnnd&6&67\`\l\l[m
8<8I8I8UT 3 34\`\o\o[p 	  )6H%/#%>zHH "$x(G36w
M3Z $9/	<d$?@!66vyIF'((99##' "-!6!6"+FIVZ"["e"e"((,0D0DR0HHQVQ\Q\"OR $-#9#9/!YMgMgjkMknpMpJp:q#rLNN%667yP
 !-^->->v-F]a b (/M!4D4I4IQ4N+8+;($$1$?$?$CM %2$?$?)a-@P$QM"))-8I$9L "LL);DM $3d#:#gt{{?g?g &+A+INf%*%5%5m%D",1,<,<=Q,R)(-C-K-3O 1 D_ AK?\_@`jO+<<fuO4>?PRU4V
/00\m 
 !d6l$:O&OUVZ[jVkUl mV~Q(  s?33 )/)::J3K_J` ax/
| <33  "12C D*,'<?z[h<i K8Ld$?@&*&?&?QZ&[O*003
(#()##'  *6 **--f5)-*& 18=STU=V=[=[_`=`4J14M1)?)M)Mb)Q&+223IJ/K2 &+\\2MSU%V"*),,43F3F3L3LU[,\M),,499??6,RM,22'1%,,Q0EqI%**86K+KWVXY&,2215G"".)?)B)BI\I\IbIbkq)B)r&)?)B)Bag)B)h&%;%B%B1F[]^%_"%;%@%@NcAcelnp%q"3::1>STYY,,b 
 ',I,P,PQRTi,j,o,o00"-) ($ @AFV#D$5$5zB*$ @AFV#D$7$7D46JLiiirA   c                 |   t        | j                  j                               j                  }t	        |t
        j                        s| j                  |d      j                  }|j                  ||      }|r}| j                  |d      j                  d   }|j                  |d      }| j                  t        j                  |      d      j                  d   }|j                  |d      }||fS | j                  |      j                  }|j                  |d      }t        j                  |      }	||	fS )	Nr   )r   rC   r   Tr   r   r   r   )nextr\   rM   r   rp   r   Tensorr]   pixel_valuesr   r   repeat_interleaver   image_embeds)
rz   imagerC   num_images_per_promptr   r   image_enc_hidden_statesuncond_image_enc_hidden_statesr   uncond_image_embedss
             r?   encode_imagez$AnimateDiffSDXLPipeline.encode_image8  sD   T''2245;;%.**5*FSSEe4&*&8&8UY&8&Z&h&hik&l#&=&O&OPekl&O&m#-1-?-?  'd .@ .mB. * .L-]-]%1 .^ .* +,JJJ--e4AAL'99:OUV9WL"'"2"2<"@!444rA   c                    g }|rg }|t        |t              s|g}t        |      t        | j                  j                  j
                        k7  rBt        dt        |       dt        | j                  j                  j
                         d      t        || j                  j                  j
                        D ]`  \  }}	t        |	t               }
| j                  ||d|
      \  }}|j                  |d d d f          |sIj                  |d d d f          b n?|D ]:  }|r%|j                  d      \  }}j                  |       |j                  |       < g }t        |      D ]|  \  }}t        j                  |g|z  d      }|r7t        j                  |   g|z  d      }t        j                  ||gd      }|j                  |      }|j                  |       ~ |S )	NzK`ip_adapter_image` must have same length as the number of IP Adapters. Got z images and z IP Adapters.r0   r,   r   r   )rC   )rp   r6   rP   rf   encoder_hid_projimage_projection_layersrH   r   r   r   r   chunk	enumerater   catr   )rz   ip_adapter_imageip_adapter_image_embedsrC   r   r~   r   negative_image_embedssingle_ip_adapter_imageimage_proj_layeroutput_hidden_statesingle_image_embedssingle_negative_image_embedsis                 r?   prepare_ip_adapter_image_embedsz7AnimateDiffSDXLPipeline.prepare_ip_adapter_image_embedsQ  sY    &$&!"*.5$4#5 #$DII,F,F,^,^(__ abefvbwax  yE  FI  JN  JS  JS  Jd  Jd  J|  J|  F}  E~  ~K  L  >A $))"<"<"T"T> 
X9')9 +55E*W&W#DHDUDU+VQ8KEA#%A ##$7a$@A.)001MdTUg1VW
X (? 9#.H[HaHabcHdE02E)001MN##$78	9 #%&/&= 	@"A""'))-@,ADY,Y_`"a*/4yy:OPQ:R9SVk9kqr/s,&+ii1MOb0cij&k#"5"8"8"8"G#**+>?	@ '&rA   c                    d| j                   j                  j                  z  |z  }|j                  \  }}}}}|j	                  ddddd      j                  ||z  |||      }| j                   j                  |      j                  }|d d d f   j                  ||df|j                  dd  z         j	                  ddddd      }|j                         }|S )Nr0   r   r,   r      r   )	re   ru   scaling_factorr   permutereshapedecodesamplefloat)	rz   r^   r   channels
num_framesheightwidthr   videos	            r?   decode_latentsz&AnimateDiffSDXLPipeline.decode_latents  s    dhhoo444w>:A--7
Hj&%//!Q1a088j9PRZ\bdij(//dAg&&
J'CekkRSRTo'UV^^_`bcefhiklmrA   c                 V   dt        t        j                  | j                  j                        j
                  j                               v }i }|r||d<   dt        t        j                  | j                  j                        j
                  j                               v }|r||d<   |S )Neta	generator)rI   rJ   rK   rQ   steprM   rN   )rz   r   r   accepts_etaextra_step_kwargsaccepts_generators         r?   prepare_extra_step_kwargsz1AnimateDiffSDXLPipeline.prepare_extra_step_kwargs  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  rA   c           
      f    |dz  dk7  s|dz  dk7  rt        d| d| d      |Lt         fd|D              s8t        d j                   d|D cg c]  }| j                  vs| c}       ||t        d	| d
| d      ||t        d| d
| d      ||t        d      |7t        |t              s't        |t
              st        dt        |             |7t        |t              s't        |t
              st        dt        |             ||t        d| d| d      ||t        d| d| d      |A|?|j                  |j                  k7  r&t        d|j                   d|j                   d      ||	t        d      ||
t        d      y y c c}w )Nrj   r   z7`height` and `width` have to be divisible by 8 but are z and r   c              3   :   K   | ]  }|j                   v   y wN)_callback_tensor_inputs).0krz   s     r?   	<genexpr>z7AnimateDiffSDXLPipeline.check_inputs.<locals>.<genexpr>  s#      F
23A---F
s   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.z Cannot forward both `prompt_2`: zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z4`prompt_2` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: z)Cannot forward both `negative_prompt_2`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` zIf `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`.zIf `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`.)rH   allr   rp   r   r6   r   r   )rz   r{   r|   r   r   r   r   r_   r`   r   rc   "callback_on_step_end_tensor_inputsr   s   `            r?   check_inputsz$AnimateDiffSDXLPipeline.check_inputs  s    A:?eai1nVW]V^^cdicjjklmm-9# F
7YF
 C
 DTEaEaDbbn  |^  pHvw  bc  ko  kG  kG  bGpq  pH  oI  J  -";08N}o ^0 0  !m&?28*<RS`Ra b0 0  ^ 5w  FC)@TZ\`IaQRVW]R^Q_`aa!:h+DZX`bfMgSTXYaTbScdee&+A+M9/9J K*++]_  */E/Q;<M;N O*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  $)=)E U  "-2O2W y  3X-] pHs   F.%F.c
                 2   ||||| j                   z  || j                   z  f}
t        |t              r)t        |      |k7  rt	        dt        |       d| d      |	t        |
|||      }	n|	j                  |      }	|	| j                  j                  z  }	|	S )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r   rC   r   )	rk   rp   r6   rP   rH   r*   r   rQ   init_noise_sigma)rz   r   num_channels_latentsr   r   r   r   rC   r   r^   r   s              r?   prepare_latentsz'AnimateDiffSDXLPipeline.prepare_latents  s      d+++T***
 i&3y>Z+GA#i.AQ R&<'gi 
 ?"5IfTYZGjj(G DNN;;;rA   c                 8   t        ||z   |z         }| j                  j                  j                  t	        |      z  |z   }| j                  j
                  j                  j                  }||k7  rt        d| d| d      t        j                  |g|      }|S )Nz7Model expects an added time embedding vector of length z, but a vector of z was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`.r   )r6   rf   ru   addition_time_embed_dimrP   add_embeddinglinear_1in_featuresrH   r   tensor)	rz   original_sizecrops_coords_top_lefttarget_sizer   text_encoder_projection_dimrb   passed_add_embed_dimexpected_add_embed_dims	            r?   _get_add_time_idsz)AnimateDiffSDXLPipeline._get_add_time_ids  s     M,AAKOP II44s<7HHKff 	 "&!8!8!A!A!M!M!%99IJ`Iaas  uI  tJ  JU  V  ||\N%@rA   c                 0   | j                   j                  }| j                   j                  t        j                         t        | j                   j                  j                  j                  d   j                  t        t        t        f      }|r| j                   j                  j                  |       | j                   j                  j                  j                  |       | j                   j                  j                  j                  |       y y )Nr   r   )re   r   r   r   float32rp   decoder	mid_block
attentions	processorr   r   r   post_quant_convconv_in)rz   r   use_torch_2_0_or_xformerss      r?   
upcast_vaez"AnimateDiffSDXLPipeline.upcast_vae  s    %--($.HH&&11!4>> %%%
! %HH$$''.HH$$''.HH&&))%0 %rA   i   wembedding_dimr   returnc                 l   t        |j                        dk(  sJ |dz  }|dz  }t        j                  t        j                  d            |dz
  z  }t        j
                  t        j                  ||      | z        }|j                  |      dddf   |dddf   z  }t        j                  t        j                  |      t        j                  |      gd      }|dz  dk(  r*t        j                  j                  j                  |d      }|j                  |j                  d	   |fk(  sJ |S )
a  
        See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298

        Args:
            w (`torch.Tensor`):
                Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
            embedding_dim (`int`, *optional*, defaults to 512):
                Dimension of the embeddings to generate.
            dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
                Data type of the generated embeddings.

        Returns:
            `torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
        r0   g     @@r,   g     @r   Nr   )r   r0   r   )rP   r   r   logr  exparanger   r   sincosnn
functionalpad)rz   r  r  r   half_dimembs         r?   get_guidance_scale_embeddingz4AnimateDiffSDXLPipeline.get_guidance_scale_embedding$  s   " 177|q   J A%iiW-.(Q,?iiXU;sdBCdd5k!T'"Sq\1ii338a@1!((%%))#v6CyyQWWQZ7777
rA   c                     | j                   S r   )_guidance_scalerz   s    r?   guidance_scalez&AnimateDiffSDXLPipeline.guidance_scaleB  s    ###rA   c                     | j                   S r   )_guidance_rescaler$  s    r?   r;   z(AnimateDiffSDXLPipeline.guidance_rescaleF  s    %%%rA   c                     | j                   S r   )
_clip_skipr$  s    r?   r   z!AnimateDiffSDXLPipeline.clip_skipJ      rA   c                 h    | j                   dkD  xr" | j                  j                  j                  d u S )Nr0   )r#  rf   ru   time_cond_proj_dimr$  s    r?   r~   z3AnimateDiffSDXLPipeline.do_classifier_free_guidanceQ  s.    ##a'WDII,<,<,O,OSW,WWrA   c                     | j                   S r   )_cross_attention_kwargsr$  s    r?   cross_attention_kwargsz.AnimateDiffSDXLPipeline.cross_attention_kwargsU  s    +++rA   c                     | j                   S r   )_denoising_endr$  s    r?   denoising_endz%AnimateDiffSDXLPipeline.denoising_endY      """rA   c                     | j                   S r   )_num_timestepsr$  s    r?   num_timestepsz%AnimateDiffSDXLPipeline.num_timesteps]  r3  rA   c                     | j                   S r   )
_interruptr$  s    r?   	interruptz!AnimateDiffSDXLPipeline.interrupta  r*  rA      2   g      @        pil)r   r   r^   r   r   r   rB   rD   rE   r2  r%  r   r   r   r   output_typereturn_dictr/  r;   r  r  r  negative_original_sizenegative_crops_coords_top_leftnegative_target_sizecallback_on_step_endr   c$                   @ |xs | j                   | j                  z  }|xs | j                   | j                  z  }d}|xs ||f}|xs ||f}| j                  |||||||||||#       |
| _        || _        |!| _        || _        |	| _        d| _        |t        |t              rd}$n-|t        |t              rt        |      }$n|j                  d   }$| j                  }%| j                  | j                  j!                  dd      nd}&| j#                  |||%|| j$                  |||||||&| j&                        \  }}}}t)        | j*                  ||%||      \  }}| j,                  j.                  j0                  }'| j3                  |$|z  |'||||j4                  |%||	      }| j7                  ||      }(|})| j8                  t;        |j                  d         }*n | j8                  j.                  j<                  }*| j?                  ||||j4                  |*      }+|#| !| j?                  ||| |j4                  |*      },n|+},| j$                  rKtA        jB                  ||gd	      }tA        jB                  ||)gd	      })tA        jB                  |,|+gd	      }+|jE                  |d
      }|jG                  |%      }|)jG                  |%      })|+jG                  |%      jI                  |$|z  d      }+||"| jK                  |||%|$|z  | j$                        }-| jL                  t        | jL                  tN              r| jL                  dkD  r| jL                  dk  rt;        tQ        | j*                  j.                  jR                  | jL                  | j*                  j.                  jR                  z  z
              @t        t        tU        @fd|                  }|d| }d}.| j,                  j.                  jV                  tA        jX                  | jZ                  dz
        jI                  |$|z        }/| j]                  |/| j,                  j.                  jV                        jG                  |%|j4                        }.| j^                  r| j`                  nd}0tc        |0      D ]^  }1| j^                  r#| je                  ||1||%|j4                  |      \  }}t        |      | _3        | ji                  | jf                        5 }2tk        |      D ]  \  }3}4| jl                  r| j$                  rtA        jB                  |gdz        n|}5| j*                  jo                  |5|4      }5|)|+d}6||r-|6d<   | j-                  |5|4||.| j                  |6d      d   }7| j$                  r)|7jq                  d      \  }8}9|8| jZ                  |9|8z
  z  z   }7| j$                  r'| jr                  dkD  rtu        |79| jr                        }7 | j*                  jv                  |7|4|fi |(ddid   }|"i }:|#D ]  };ty               |;   |:|;<     |"| |3|4|:      }<|<j{                  d|      }|<j{                  d|      }|<j{                  d|      }|<j{                  d|)      })|<j{                  d|      }|<j{                  d|+      }+|<j{                  d|,      },|2j}                          t~        st        j                           	 ddd       a | j                  j4                  t@        j                  k(  xr  | j                  j.                  j                  }=|=r_| j                          |jG                  t        t        | j                  j                  j                                     j4                        }|dk(  r|}>n.| j                  |      }?| j                  j                  |?|      }>|=r*| j                  jG                  t@        j                         | j                          |s|>fS t        |>       S # 1 sw Y   xY w)!u%1  
        Function invoked when calling the pipeline for generation.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts to guide the video generation. If not defined, one has to pass `prompt_embeds`.
                instead.
            prompt_2 (`str` or `List[str]`, *optional*):
                The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
                used in both text-encoders
            num_frames:
                The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds
                amounts to 2 seconds of video.
            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
                The height in pixels of the generated video. This is set to 1024 by default for the best results.
                Anything below 512 pixels won't work well for
                [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
                and checkpoints that are not specifically fine-tuned on low resolutions.
            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
                The width in pixels of the generated video. This is set to 1024 by default for the best results.
                Anything below 512 pixels won't work well for
                [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
                and checkpoints that are not specifically fine-tuned on low resolutions.
            num_inference_steps (`int`, *optional*, defaults to 50):
                The number of denoising steps. More denoising steps usually lead to a higher quality video at the
                expense of slower inference.
            timesteps (`List[int]`, *optional*):
                Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
                in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
                passed will be used. Must be in descending order.
            sigmas (`List[float]`, *optional*):
                Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
                their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
                will be used.
            denoising_end (`float`, *optional*):
                When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
                completed before it is intentionally prematurely terminated. As a result, the returned sample will
                still retain a substantial amount of noise as determined by the discrete timesteps selected by the
                scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
                "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
                Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
            guidance_scale (`float`, *optional*, defaults to 5.0):
                Guidance scale as defined in [Classifier-Free Diffusion
                Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
                of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
                `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
                the text `prompt`, usually at the expense of lower video quality.
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the video generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            negative_prompt_2 (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the video generation to be sent to `tokenizer_2` and
                `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
            num_videos_per_prompt (`int`, *optional*, defaults to 1):
                The number of videos to generate per prompt.
            eta (`float`, *optional*, defaults to 0.0):
                Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
                applies to [`schedulers.DDIMScheduler`], will be ignored for others.
            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                to make generation deterministic.
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for video
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor will ge generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                argument.
            pooled_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
                If not provided, pooled text embeddings will be generated from `prompt` input argument.
            negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
                input argument.
            ip_adapter_image: (`PipelineImageInput`, *optional*):
                Optional image input to work with IP Adapters.
            ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
                Pre-generated image embeddings for IP-Adapter. If not provided, embeddings are computed from the
                `ip_adapter_image` input argument.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generated video. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.stable_diffusion_xl.AnimateDiffPipelineOutput`] instead of a
                plain tuple.
            cross_attention_kwargs (`dict`, *optional*):
                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
                `self.processor` in
                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
            guidance_rescale (`float`, *optional*, defaults to 0.0):
                Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
                Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
                [Common Diffusion Noise Schedules and Sample Steps are
                Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
                using zero terminal SNR.
            original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
                If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
                `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
                explained in section 2.2 of
                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
            crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
                `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
                `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
                `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
            target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
                For most cases, `target_size` should be set to the desired height and width of the generated image. If
                not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
                section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
            negative_original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
                To negatively condition the generation process based on a specific image resolution. Part of SDXL's
                micro-conditioning as explained in section 2.2 of
                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
                information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
            negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
                To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
                micro-conditioning as explained in section 2.2 of
                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
                information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
            negative_target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
                To negatively condition the generation process based on a target image resolution. It should be as same
                as the `target_size` for most cases. Part of SDXL's micro-conditioning as explained in section 2.2 of
                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
                information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
            callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
                `callback_on_step_end_tensor_inputs`.
            callback_on_step_end_tensor_inputs (`List`, *optional*):
                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
                `._callback_tensor_inputs` attribute of your pipeline class.

        Examples:

        Returns:
            [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] is
                returned, otherwise a `tuple` is returned where the first element is a list with the generated frames.
        r0   FNr   scale)r{   r|   rC   r}   r~   r   r   r_   r`   r   rc   r   r   r   )r   r  r   )repeatsr3   c                     | k\  S r   rG   )tsdiscrete_timestep_cutoffs    r?   <lambda>z2AnimateDiffSDXLPipeline.__call__.<locals>.<lambda>  s    RC[=[ rA   )r  r   )totalr,   )text_embedstime_idsr   )encoder_hidden_statestimestep_condr/  added_cond_kwargsr?  r<  )r;   r?  r^   r_   r`   ra   rc   rb   rd   latent)r   r>  r   )frames)Ory   rk   r   r#  r'  r)  r.  r1  r8  rp   r   r6   rP   r   r   r/  getr   r~   r   rU   rQ   rf   ru   in_channelsr   r   r   r[   intprojection_dimr  r   r   r   r   r   r   r2  r   roundnum_train_timestepsfilterr,  r  r%  r!  free_init_enabled_free_init_num_itersr7   _apply_free_initr5  progress_barr   r9  scale_model_inputr   r;   r@   r   localspopupdateXLA_AVAILABLExm	mark_stepre   float16force_upcastr  r   iterr  rM   r   rw   postprocess_videomaybe_free_model_hooksr1   )Arz   r{   r|   r   r   r   rB   rD   rE   r2  r%  r   r   r}   r   r   r^   r_   r`   r   rc   r   r   r>  r?  r/  r;   r  r  r  r@  rA  rB  r   rC  r   r   rC   r   r   r   ra   r  rb   rd   r   rO  guidance_scale_tensornum_free_init_itersfree_init_iterr]  r   tlatent_model_inputrP  
noise_prednoise_pred_uncondr:   callback_kwargsr   callback_outputsneeds_upcastingr   video_tensorrI  sA                                                                   @r?   __call__z AnimateDiffSDXLPipeline.__call__e  s   z K433d6K6KKI11D4I4II !%8&%!4fe_ 	" ).	
  .!1#'=$+ *VS"9JJvt$<VJ&,,Q/J'' ?C>Y>Y>eD''++GT:ko 	 "7(,(H(H+/'#9!5*G!nn  
	
" )$ *<NN/F*
&	&
  $yy//;;&&.. 

 !::9cJ /&*-.B.H.H.L*M'*.*=*=*D*D*S*S'--!%%(C . 
 "-2F2R$($:$:&.$#)),G %; %! %1!++!II'=}&MSTUM#ii)F(X^_`O 99&;\%JPQRL%77
PQ7R%((0),,V4#v.55jCX6XZ[\'+B+N?? '2200L *4--u5""Q&""Q&'*NN))==))DNN,A,A,U,UUW($ #&d62[]f+g&h"i!"6#67I 99..:$)LL1D1Dq1H$I$P$PQ[^sQs$t! ==%TYY5E5E5X5X > bgmmb4  <@;Q;Qd77WX#$78 C	'N%%%)%:%:^-@&'--Yb&" #&i.D "")<)<"= :'%i0 9'DAq~~  FJEeEeG9q=)Akr&)-)I)IJ\^_)`& 9HUa(b%'37N<H).9!%*.;&3/3/J/J*;$) "+ " "J 77=G=M=Ma=P:)?%69L9LP_bsPs9t%t
77D<Q<QTW<W%6&$J_J_&

 2dnn11*aqL]qkpqrstG+7*,!C =A17!OA.=+?aO+\("2"6"6y'"J(8(<(<_m(\1A1E1EF^`v1w.*:*>*>?PRa*b8H8L8L;=Z95 (8';';NL'Y0@0D0DE\^s0t- '')$s9':' :'C	'L ((..EMM9Zdhhoo>Z>ZOOjjd488+C+C+N+N+P&Q!R!X!XYG ("E..w7L((::[f:gE HHKKemmK, 	##%8O(66i:' :'s   
G`:*`::a	)NNT)NNr0   TNNNNNNNNr   )NNNNNNN)A__name__
__module____qualname____doc__model_cpu_offload_seq_optional_componentsr   r   r   r   r   r	   r   r   r   r   r#   r"   r!   r    r   r   r
   boolro   r   r   r   rC   rU  r   r   r   r   r   r   r   r   r   r  r  r
  r   r!  propertyr%  r;   r   r~   r/  r2  r6  r9  no_gradr'   EXAMPLE_DOC_STRINGr   	Generatorr   r   r   r   r   ru  __classcell__)rO   s   @r?   rW   rW      sK   (T U6 8<04-1'.
.
 $.
 4	.

 !.
 #.
 (/9:.
 &.
  "+')
.
" 5#.
$ .%.
& '+'.
h #')-%&,0)-+/049=7;@D&*#'ljlj 3-lj &	lj
  #lj &*lj "#lj $C=lj  -lj !) 6lj 'u||4lj (0'=lj UOlj C=lj^52+'\
!. #!&*+/FT nr4 ei$1( 58emm.1@E	< $ $ & &   X X , , # # # #   U]]_12 )-48 $##%#")- #;?=A/0MQ*.049=7;@D9=@D%* ;?"%371715<@:@:>#'KO9BIv7c49n%v7 5d3i01v7 	v7
 v7 }v7 !v7 9v7 Uv7  v7 v7 "%T#Y"78v7 $E#tCy.$9:v7  (}v7 v7  E%//43H"HIJ!v7" %,,'#v7$  -%v7& !) 6'v7( 'u||4)v7* (0'=+v7, ##56-v7. "*$u||*<!=/v70 c]1v72 3v74 !)c3h 85v76  7v78  c3h09v7:  %S#X;v7< eCHo.=v7> !)sCx 9?v7@ ).c3hAv7B 'uS#X7Cv7D C=Ev7F 'xc40@$0F'GHGv7H -1IIv7 3 v7rA   rW   )r<  )NNNN)LrJ   typingr   r   r   r   r   r   r	   r   transformersr
   r   r   r   r   image_processorr   loadersr   r   r   r   modelsr   r   r   r   r   models.attention_processorr   r   r   models.lorar   
schedulersr   r   r    r!   r"   r#   utilsr$   r%   r&   r'   r(   r)   utils.torch_utilsr*   rw   r+   free_init_utilsr-   pipeline_utilsr.   r/   pipeline_outputr1   torch_xla.core.xla_modelcore	xla_modelrc  rb  
get_loggerrv  r   r  r@   rU  r   rC   r   rU   rW   rG   rA   r?   <module>r     s(    D D D   2  k j 
 :   . - + D 6 ))MM			H	%. d: *.15%)$(8*!#8* U3,-.8* S	"	8*
 T%[!8*vK7$K7rA   