
    bi¥                        d dl Z d dlmZmZmZmZmZmZ d dlZd dl	m
Z
mZmZmZ ddlmZ ddlmZmZmZmZ ddlmZmZmZmZ ddlmZ dd	lmZ dd
lmZm Z m!Z!m"Z"m#Z#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6m7Z7 ddl8m9Z9  e(       rd dl:m;c m<Z= dZ>ndZ> e)j~                  e@      ZAdZB G d de6e7eeee2e4e
      ZCy)    N)AnyCallableDictListOptionalUnion)CLIPImageProcessorCLIPTextModelCLIPTokenizerCLIPVisionModelWithProjection   )PipelineImageInput)FromSingleFileMixinIPAdapterMixinStableDiffusionLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKLImageProjectionUNet2DConditionModelUNetMotionModel)adjust_lora_scale_text_encoder)MotionAdapter)DDIMSchedulerDPMSolverMultistepSchedulerEulerAncestralDiscreteSchedulerEulerDiscreteSchedulerLMSDiscreteSchedulerPNDMScheduler)USE_PEFT_BACKEND	deprecateis_torch_xla_availableloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)randn_tensor)VideoProcessor   )FreeInitMixin)AnimateDiffFreeNoiseMixin)DiffusionPipelineStableDiffusionMixin   )AnimateDiffPipelineOutputTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import MotionAdapter, AnimateDiffPipeline, DDIMScheduler
        >>> from diffusers.utils import export_to_gif

        >>> adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2")
        >>> pipe = AnimateDiffPipeline.from_pretrained("frankjoshua/toonyou_beta6", motion_adapter=adapter)
        >>> pipe.scheduler = DDIMScheduler(beta_schedule="linear", steps_offset=1, clip_sample=False)
        >>> output = pipe(prompt="A corgi walking in the park")
        >>> frames = output.frames[0]
        >>> export_to_gif(frames, "animation.gif")
        ```
c            /       z    e Zd ZdZdZg dZg dZ	 	 d;dedede	d	e
eef   d
ede
eeeeeef   dedef fdZ	 	 	 	 	 d<deej2                     deej2                     dee   dee   fdZd=dZd Zd>defdZd Z 	 	 	 	 	 	 d?dZ!	 d=dZ"e#d        Z$e#d        Z%e#d        Z&e#d        Z'e#d         Z(e#d!        Z) ejT                          e+e,      ddddd"d#dd$d%ddddddd&d'dddd(gdfd)ee
e-e.e-   f      d*ee   d+ee   d,ee   d-ed.ed/ee
e-e.e-   f      d0ee   d1ed2ee
ej^                  e.ej^                     f      d(eej2                     deej2                     deej2                     d3ee0   d4ee.ej2                        d5ee-   d6e1d7ee2e-e3f      dee   d8ee4eee2gdf      d9e.e-   def,d:              Z5 xZ6S )@AnimateDiffPipelineav  
    Pipeline for text-to-video generation.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
    implemented for all pipelines (downloading, saving, running on a particular device, etc.).

    The pipeline also inherits the following loading methods:
        - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
        - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
        - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
        - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters

    Args:
        vae ([`AutoencoderKL`]):
            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
        text_encoder ([`CLIPTextModel`]):
            Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
        tokenizer (`CLIPTokenizer`):
            A [`~transformers.CLIPTokenizer`] to tokenize text.
        unet ([`UNet2DConditionModel`]):
            A [`UNet2DConditionModel`] used to create a UNetMotionModel to denoise the encoded video latents.
        motion_adapter ([`MotionAdapter`]):
            A [`MotionAdapter`] to be used in combination with `unet` to denoise the encoded video latents.
        scheduler ([`SchedulerMixin`]):
            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    z&text_encoder->image_encoder->unet->vae)feature_extractorimage_encodermotion_adapter)latentsprompt_embedsnegative_prompt_embedsNvaetext_encoder	tokenizerunetr3   	schedulerr1   r2   c	           
      ^   t         	|           t        |t              rt	        j
                  ||      }| j                  ||||||||       t        | dd       r/dt        | j                  j                  j                        dz
  z  nd| _        t        d| j                        | _        y )N)r7   r8   r9   r:   r3   r;   r1   r2   r7   r(   r-      F)	do_resizevae_scale_factor)super__init__
isinstancer   r   from_unet2dregister_modulesgetattrlenr7   configblock_out_channelsr?   r'   video_processor)
selfr7   r8   r9   r:   r3   r;   r1   r2   	__class__s
            o/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/animatediff/pipeline_animatediff.pyrA   zAnimateDiffPipeline.__init__x   s    $ 	d01"..t^DD%)/' 	 		
 W^^bdikoVpc$((//*L*L&MPQ&Q Rvw-PTPePef    r5   r6   
lora_scale	clip_skipc
                 H
   |Jt        | t              r:|| _        t        st	        | j
                  |       nt        | j
                  |       |t        |t              rd}
n-|t        |t              rt        |      }
n|j                  d   }
|t        | t              r| j                  || j                        }| j                  |d| j                  j                  dd      }|j                  }| j                  |dd	      j                  }|j                  d
   |j                  d
   k\  rt!        j"                  ||      sj| j                  j%                  |dd| j                  j                  dz
  d
f         }t&        j)                  d| j                  j                   d|        t+        | j
                  j,                  d      r<| j
                  j,                  j.                  r|j0                  j3                  |      }nd}|	(| j                  |j3                  |      |      }|d   }nT| j                  |j3                  |      |d      }|d
   |	dz       }| j
                  j4                  j7                  |      }| j
                  | j
                  j8                  }n/| j:                  | j:                  j8                  }n|j8                  }|j3                  ||      }|j                  \  }}}|j=                  d|d      }|j?                  ||z  |d
      }|rm|j|dg|
z  }n|:tA        |      tA        |      ur$tC        dtA        |       dtA        |       d      t        |t              r|g}n1|
t        |      k7  r!tE        d| dt        |       d| d|
 d	      |}t        | t              r| j                  || j                        }|j                  d   }| j                  |d|dd      }t+        | j
                  j,                  d      r<| j
                  j,                  j.                  r|j0                  j3                  |      }nd}| j                  |j                  j3                  |      |      }|d   }|rK|j                  d   }|j3                  ||      }|j=                  d|d      }|j?                  |
|z  |d
      }| j
                  ,t        | t              rt        rtG        | j
                  |       ||fS )a  
        Encodes the prompt into text encoder hidden states.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                prompt to be encoded
            device: (`torch.device`):
                torch device
            num_images_per_prompt (`int`):
                number of images that should be generated per prompt
            do_classifier_free_guidance (`bool`):
                whether to use classifier free guidance or not
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the image generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                argument.
            lora_scale (`float`, *optional*):
                A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
            clip_skip (`int`, *optional*):
                Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
                the output of the pre-final layer will be used for computing the prompt embeddings.
        Nr-   r   
max_lengthTpt)paddingrQ   
truncationreturn_tensorslongest)rS   rU   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: use_attention_mask)attention_mask)rY   output_hidden_states)dtypedevice z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)$rB   r   _lora_scaler   r   r8   r$   strlistrF   shaper   maybe_convert_promptr9   model_max_length	input_idstorchequalbatch_decodeloggerwarninghasattrrG   rX   rY   to
text_modelfinal_layer_normr[   r:   repeatviewtype	TypeError
ValueErrorr%   )rJ   promptr\   num_images_per_promptdo_classifier_free_guidancenegative_promptr5   r6   rN   rO   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textrY   prompt_embeds_dtypebs_embedseq_len_uncond_tokensrQ   uncond_inputs                          rL   encode_promptz!AnimateDiffPipeline.encode_prompt   s6   V !j7U&V)D $.t/@/@*M!$"3"3Z@*VS"9JJvt$<VJ&,,Q/J $ ;<2264>>J..$>>::# ) K )22N"nnVYW[n\ffO$$R(N,@,@,DDU[[N  $~~::#At~~'F'F'JR'O$OP  778	,Q
 t((//1EF4K\K\KcKcKvKv!,!;!;!>!>v!F!%  $ 1 1.2C2CF2K\j 1 k -a 0 $ 1 1"%%f-ncg !2 ! !.b 1IM2B C
 !% 1 1 < < M Mm \("&"3"3"9"9YY""&))//"/"5"5%((/B6(R,22'1%,,Q0EqI%**86K+KWVXY '+A+I&!#z 1#VD<Q(QUVZ[jVkUl mV~Q(  OS1!0 1s?33 )/)::J3K_J` ax/
| <33  !0 $ ;< $ 9 9- X&,,Q/J>>$%# * L t((//1EF4K\K\KcKcKvKv!-!<!<!?!?!G!%%)%6%6&&))&1- &7 &" &<A%>"&,2215G%;%>%>EXag%>%h"%;%B%B1F[]^%_"%;%@%@NcAcelnp%q"($ >?DT#D$5$5zB444rM   c                 |   t        | j                  j                               j                  }t	        |t
        j                        s| j                  |d      j                  }|j                  ||      }|r}| j                  |d      j                  d   }|j                  |d      }| j                  t        j                  |      d      j                  d   }|j                  |d      }||fS | j                  |      j                  }|j                  |d      }t        j                  |      }	||	fS )	NrR   )rU   )r\   r[   T)rZ   r   dim)nextr2   
parametersr[   rB   rf   Tensorr1   pixel_valuesrl   hidden_statesrepeat_interleave
zeros_likeimage_embeds)
rJ   imager\   ru   rZ   r[   image_enc_hidden_statesuncond_image_enc_hidden_statesr   uncond_image_embedss
             rL   encode_imagez AnimateDiffPipeline.encode_imageS  sD   T''2245;;%.**5*FSSEe4&*&8&8UY&8&Z&h&hik&l#&=&O&OPekl&O&m#-1-?-?  'd .@ .mB. * .L-]-]%1 .^ .* +,JJJ--e4AAL'99:OUV9WL"'"2"2<"@!444rM   c                    g }|rg }|t        |t              s|g}t        |      t        | j                  j                  j
                        k7  rBt        dt        |       dt        | j                  j                  j
                         d      t        || j                  j                  j
                        D ]`  \  }}	t        |	t               }
| j                  ||d|
      \  }}|j                  |d d d f          |sIj                  |d d d f          b n?|D ]:  }|r%|j                  d      \  }}j                  |       |j                  |       < g }t        |      D ]|  \  }}t        j                  |g|z  d      }|r7t        j                  |   g|z  d      }t        j                  ||gd      }|j                  |      }|j                  |       ~ |S )	NzK`ip_adapter_image` must have same length as the number of IP Adapters. Got z images and z IP Adapters.r-   r(   r   r   r\   )rB   ra   rF   r:   encoder_hid_projimage_projection_layersrs   zipr   r   appendchunk	enumeraterf   catrl   )rJ   ip_adapter_imageip_adapter_image_embedsr\   ru   rv   r   negative_image_embedssingle_ip_adapter_imageimage_proj_layeroutput_hidden_statesingle_image_embedssingle_negative_image_embedsis                 rL   prepare_ip_adapter_image_embedsz3AnimateDiffPipeline.prepare_ip_adapter_image_embedsl  sY    &$&!"*.5$4#5 #$DII,F,F,^,^(__ abefvbwax  yE  FI  JN  JS  JS  Jd  Jd  J|  J|  F}  E~  ~K  L  >A $))"<"<"T"T> 
X9')9 +55E*W&W#DHDUDU+VQ8KEA#%A ##$7a$@A.)001MdTUg1VW
X (? 9#.H[HaHabcHdE02E)001MN##$78	9 #%&/&= 	@"A""'))-@,ADY,Y_`"a*/4yy:OPQ:R9SVk9kqr/s,&+ii1MOb0cij&k#"5"8"8"8"G#**+>?	@ '&rM      decode_chunk_sizec                 N   d| j                   j                  j                  z  |z  }|j                  \  }}}}}|j	                  ddddd      j                  ||z  |||      }g }t        d|j                  d   |      D ]@  }	||	|	|z    }
| j                   j                  |
      j                  }
|j                  |
       B t        j                  |      }|d d d f   j                  ||df|j                  dd  z         j	                  ddddd      }|j                         }|S )Nr-   r   r(   r      rW   )r7   rG   scaling_factorrb   permutereshaperangedecodesampler   rf   r   float)rJ   r4   r   rx   channels
num_framesheightwidthvideor   batch_latentss              rL   decode_latentsz"AnimateDiffPipeline.decode_latents  s'   dhhoo444w>:A--7
Hj&%//!Q1a088j9PRZ\bdijq'--*,=> 	(A#A,=(=>M HHOOM:AAMLL'	(
 		% dAg&&
J'CekkRSRTo'UV^^_`bcefhiklmrM   c                 V   dt        t        j                  | j                  j                        j
                  j                               v }i }|r||d<   dt        t        j                  | j                  j                        j
                  j                               v }|r||d<   |S )Neta	generator)setinspect	signaturer;   stepr   keys)rJ   r   r   accepts_etaextra_step_kwargsaccepts_generators         rL   prepare_extra_step_kwargsz-AnimateDiffPipeline.prepare_extra_step_kwargs  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  rM   c           
          |dz  dk7  s|dz  dk7  rt        d| d| d      |0t        |t              r|dk  rt        d| dt        |       d      |
Lt	         fd|
D              s8t        d	 j
                   d
|
D cg c]  }| j
                  vs| c}       ||t        d| d| d      ||t        d      |2t        |t        t        t        f      st        dt        |            ||t        d| d| d      |A|?|j                  |j                  k7  r&t        d|j                   d|j                   d      ||	t        d      |	Ut        |	t              st        dt        |	             |	d   j                  dvrt        d|	d   j                   d      y y c c}w )Nr=   r   z7`height` and `width` have to be divisible by 8 but are z and r^   z5`callback_steps` has to be a positive integer but is z	 of type c              3   :   K   | ]  }|j                   v   y wN)_callback_tensor_inputs).0krJ   s     rL   	<genexpr>z3AnimateDiffPipeline.check_inputs.<locals>.<genexpr>  s#      F
23A---F
s   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.zG`prompt` has to be of type `str`, `list` or `dict` but is type(prompt)=z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` zProvide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined.z:`ip_adapter_image_embeds` has to be of type `list` but is )r   r   zF`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is D)rs   rB   intrq   allr   r`   ra   dictrb   ndim)rJ   rt   r   r   callback_stepsrw   r5   r6   r   r   "callback_on_step_end_tensor_inputsr   s   `           rL   check_inputsz AnimateDiffPipeline.check_inputs  s    A:?eai1nVW]V^^cdicjjklmm%z.#/NR`deReGGW X(),  .9# F
7YF
 C
 DTEaEaDbbn  |^  pHvw  bc  ko  kG  kG  bGpq  pH  oI  J  -";08N}o ^0 0  ^ 5w  
6Ct;L(MgZ^_eZfYhijj&+A+M9/9J K*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  ',C,O ^  #.5t< PQUVmQnPop  )+00> \]tuv]w]|]|\}}~  ? /E pHs   F=F=c
                 |   | j                   r| j                  |||||||||		      }	t        |t              r)t	        |      |k7  rt        dt	        |       d| d      ||||| j                  z  || j                  z  f}
|	t        |
|||      }	n|	j                  |      }	|	| j                  j                  z  }	|	S )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r   r\   r[   )free_noise_enabled_prepare_latents_free_noiserB   ra   rF   rs   r?   r&   rl   r;   init_noise_sigma)rJ   rx   num_channels_latentsr   r   r   r[   r\   r   r4   rb   s              rL   prepare_latentsz#AnimateDiffPipeline.prepare_latents  s     ""660*feUTZ\egnG i&3y>Z+GA#i.AQ R&<'gi   d+++T***
 ?"5IfTYZGjj(G DNN;;;rM   c                     | j                   S r   _guidance_scalerJ   s    rL   guidance_scalez"AnimateDiffPipeline.guidance_scale"  s    ###rM   c                     | j                   S r   )
_clip_skipr   s    rL   rO   zAnimateDiffPipeline.clip_skip&      rM   c                      | j                   dkD  S )Nr-   r   r   s    rL   rv   z/AnimateDiffPipeline.do_classifier_free_guidance-  s    ##a''rM   c                     | j                   S r   )_cross_attention_kwargsr   s    rL   cross_attention_kwargsz*AnimateDiffPipeline.cross_attention_kwargs1  s    +++rM   c                     | j                   S r   )_num_timestepsr   s    rL   num_timestepsz!AnimateDiffPipeline.num_timesteps5  s    """rM   c                     | j                   S r   )
_interruptr   s    rL   	interruptzAnimateDiffPipeline.interrupt9  r   rM   2   g      @r-   g        pilTr4   rt   r   r   r   num_inference_stepsr   rw   num_videos_per_promptr   r   r   r   output_typereturn_dictr   callback_on_step_endr   c                    |j                  dd      }|j                  dd      }|t        ddd       |t        ddd       |xs- | j                  j                  j                  | j
                  z  }|xs- | j                  j                  j                  | j
                  z  }d}| j                  ||||||||||
       || _        || _        || _	        d| _
        |t        |t        t        f      rd}n-|t        |t              rt        |      }n|j                   d	   }| j"                  }| j$                  | j$                  j'                  d
d      nd}| j(                  r3| j+                  ||||| j,                  ||||| j.                  
      \  }}ng| j1                  |||| j,                  ||||| j.                  	      \  }}| j,                  rt3        j4                  ||g      }|j7                  |d	      }||"| j9                  |||||z  | j,                        }| j:                  j=                  ||       | j:                  j>                  }| j                  j                  j@                  }| jC                  ||z  |||||jD                  ||
|	      }| jG                  |
|	      } ||dind}!| jH                  r| jJ                  nd}"tM        |"      D ]:  }#| jH                  r#| jO                  ||#|||jD                  |
      \  }}t        |      | _(        t        |      || j:                  jR                  z  z
  }$| jU                  | jP                        5 }%tW        |      D ]  \  }&}'| jX                  r| j,                  rt3        j4                  |gdz        n|}(| j:                  j[                  |(|'      }(| j                  |(|'|||!      j\                  })| j,                  r|)j_                  d      \  }*}+|*||+|*z
  z  z   }) | j:                  j`                  |)|'|fi | jb                  }|Zi },|D ]  }-te               |-   |,|-<     || |&|'|,      }.|.j                  d|      }|.j                  d|      }|.j                  d|      }|&t        |      dz
  k(  s'|&dz   |$kD  rC|&dz   | j:                  jR                  z  d	k(  r$|%jg                          ||&|z  d	k(  r
 ||&|'|       th        stk        jl                           	 ddd       = |dk(  r|}/n/| jo                  ||      }0| jp                  js                  |0|      }/| ju                          |s|/fS tw        |/      S # 1 sw Y   xY w)u  
        The call function to the pipeline for generation.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
            height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
                The height in pixels of the generated video.
            width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
                The width in pixels of the generated video.
            num_frames (`int`, *optional*, defaults to 16):
                The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds
                amounts to 2 seconds of video.
            num_inference_steps (`int`, *optional*, defaults to 50):
                The number of denoising steps. More denoising steps usually lead to a higher quality videos at the
                expense of slower inference.
            guidance_scale (`float`, *optional*, defaults to 7.5):
                A higher guidance scale value encourages the model to generate images closely linked to the text
                `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts to guide what to not include in image generation. If not defined, you need to
                pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
            eta (`float`, *optional*, defaults to 0.0):
                Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
                applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
                generation deterministic.
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor is generated by sampling using the supplied random `generator`. Latents should be of shape
                `(batch_size, num_channel, num_frames, height, width)`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
                provided, text embeddings are generated from the `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
                not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
            ip_adapter_image: (`PipelineImageInput`, *optional*):
                Optional image input to work with IP Adapters.
            ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
                Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
                IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
                contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
                provided, embeddings are computed from the `ip_adapter_image` input argument.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generated video. Choose between `torch.Tensor`, `PIL.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] instead
                of a plain tuple.
            cross_attention_kwargs (`dict`, *optional*):
                A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
                [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
            clip_skip (`int`, *optional*):
                Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
                the output of the pre-final layer will be used for computing the prompt embeddings.
            callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
                `callback_on_step_end_tensor_inputs`.
            callback_on_step_end_tensor_inputs (`List`, *optional*):
                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
                `._callback_tensor_inputs` attribute of your pipeline class.
            decode_chunk_size (`int`, defaults to `16`):
                The number of frames to decode at a time when calling `decode_latents` method.

        Examples:

        Returns:
            [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] is
                returned, otherwise a `tuple` is returned where the first element is a list with the generated frames.
        callbackNr   z1.0.0zjPassing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`zpPassing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`r-   Fr   scale)
rt   r   r\   r   rv   rw   r5   r6   rN   rO   )r5   r6   rN   rO   )repeatsr   r   r   )totalr(   )encoder_hidden_statesr   added_cond_kwargsr4   r5   r6   latent)r   r   )frames)<popr    r:   rG   sample_sizer?   r   r   r   r   r   rB   r`   r   ra   rF   rb   _execution_devicer   getr   _encode_prompt_free_noiserv   rO   r   rf   r   r   r   r;   set_timesteps	timestepsin_channelsr   r[   r   free_init_enabled_free_init_num_itersr   _apply_free_initr   orderprogress_barr   r   scale_model_inputr   r   r   prev_samplelocalsupdateXLA_AVAILABLExm	mark_stepr   rI   postprocess_videomaybe_free_model_hooksr.   )1rJ   rt   r   r   r   r   r   rw   r   r   r   r4   r5   r6   r   r   r   r   r   rO   r   r   r   kwargsr   r   rx   r\   text_encoder_lora_scaler   r  r   r   r   num_free_init_itersfree_init_iternum_warmup_stepsr	  r   tlatent_model_input
noise_prednoise_pred_uncondnoise_pred_textcallback_kwargsr   callback_outputsr   video_tensors1                                                    rL   __call__zAnimateDiffPipeline.__call__=  s   R ::j$/$4d;|
 %  C O499++77$:O:OOM))558M8MM ! 	"#.	
  .#'=$ *Vc4["AJJvt$<VJ&,,Q/J'' ?C>Y>Y>eD''++GT:ko 	  ""484R4R%&;,0,L,L /+'=2.. 5S 51M1 594F4F%00+'=2.. 5G 
51M1 // %		+A=*Q R);;JTU;VM'+B+N?? '2200L 	$$%8$HNN,,	  $yy//;;&&.. 

 !::9cJ
  +/F/R \* 	 <@;Q;Qd77WX#$78 5	'N%%%)%:%:^-@&'--Yb&" #&i.D"9~0CdnnFZFZ0ZZ "")<)<"= +'%i0 *'DAq~~  FJEeEeG9q=)Akr&)-)I)IJ\^_)`& "&*.;/E*; "+ " f  77=G=M=Ma=P:)?%6?]nKn9o%o
 2dnn11*a^L]^jjG+7*,!C =A17!OA.=+?aO+\("2"6"6y'"J(8(<(<_m(\1A1E1EF^`v1w. C	NQ..AE=M3MSTWXSX\`\j\j\p\pRptuRu$++-#/A4F!4K$Q73$U*'+' +'5	'p ("E..w8IJL((::[f:gE 	##%8O(66u+' +'s   %FU7<U77V	)NN)NNNNNr   )r   )NNNNNN)7__name__
__module____qualname____doc__model_cpu_offload_seq_optional_componentsr   r   r
   r   r   r   r   r   r   r   r   r   r   r   r	   r   rA   r   rf   r   r   r   r   r   r   r   r   r   r   propertyr   rO   rv   r   r   r   no_gradr#   EXAMPLE_DOC_STRINGr`   r   	Generatorr   boolr   r   r   r   __classcell__)rK   s   @rL   r0   r0   N   s   8 EST" 157;!!g!g $!g !	!g
 (/9:!g &!g  "+')
!g .!g  5!!gT 049=&*#'t5  -t5 !) 6t5 UOt5 C=t5n52+'Z &!. # $+/CL nr@ $ $   ( ( , , # #   U]]_12 37$& $##% #;?/0MQ*.049=9=@D%* ;?#'KO9B!#/j7sDI~./j7 SMj7 	j7
 }j7 !j7 j7 "%T#Y"78j7  (}j7 j7 E%//43H"HIJj7 %,,'j7  -j7 !) 6j7 ##56j7  "*$u||*<!=!j7" c]#j7$ %j7& !)c3h 8'j7( C=)j7* 'xc40@$0F'GH+j7, -1I-j7. /j7 3 j7rM   r0   )Dr   typingr   r   r   r   r   r   rf   transformersr	   r
   r   r   image_processorr   loadersr   r   r   r   modelsr   r   r   r   models.lorar   models.unets.unet_motion_modelr   
schedulersr   r   r   r   r   r   utilsr   r    r!   r"   r#   r$   r%   utils.torch_utilsr&   rI   r'   free_init_utilsr)   free_noise_utilsr*   pipeline_utilsr+   r,   pipeline_outputr.   torch_xla.core.xla_modelcore	xla_modelr  r  
get_loggerr!  ri   r)  r0    rM   rL   <module>r@     s     = =  h h 1 w w [ [ 9 ;    . - + 8 D 6 ))MM			H	% "[7"[7rM   