
    bi3                        d dl Z d dlmZmZmZmZmZ d dlZd dl	Z	d dl
mc mZ d dlmZ d dlmZmZmZmZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*  e"       rd dl+m,c m-Z. dZ/ndZ/ e#j`                  e1      Z2dZ3d Z4ddZ5d Z6d dZ7d!dZ8	 	 	 	 d"dee9   deee:e	jv                  f      deee9      deee<      fdZ= G d de      Z>y)#    N)CallableDictListOptionalUnion)Image)	BertModelBertTokenizerQwen2TokenizerQwen2VLForConditionalGeneration   )MultiPipelineCallbacksPipelineCallback)VaeImageProcessor)AutoencoderKLMagvitEasyAnimateTransformer3DModel)DiffusionPipeline)FlowMatchEulerDiscreteScheduler)is_torch_xla_availableloggingreplace_example_docstring)randn_tensor)VideoProcessor   )EasyAnimatePipelineOutputTFaY  
    Examples:
        ```python
        >>> import torch
        >>> from diffusers import EasyAnimateControlPipeline
        >>> from diffusers.pipelines.easyanimate.pipeline_easyanimate_control import get_video_to_video_latent
        >>> from diffusers.utils import export_to_video, load_video

        >>> pipe = EasyAnimateControlPipeline.from_pretrained(
        ...     "alibaba-pai/EasyAnimateV5.1-12b-zh-Control-diffusers", torch_dtype=torch.bfloat16
        ... )
        >>> pipe.to("cuda")

        >>> control_video = load_video(
        ...     "https://huggingface.co/alibaba-pai/EasyAnimateV5.1-12b-zh-Control/blob/main/asset/pose.mp4"
        ... )
        >>> prompt = (
        ...     "In this sunlit outdoor garden, a beautiful woman is dressed in a knee-length, sleeveless white dress. "
        ...     "The hem of her dress gently sways with her graceful dance, much like a butterfly fluttering in the breeze. "
        ...     "Sunlight filters through the leaves, casting dappled shadows that highlight her soft features and clear eyes, "
        ...     "making her appear exceptionally elegant. It seems as if every movement she makes speaks of youth and vitality. "
        ...     "As she twirls on the grass, her dress flutters, as if the entire garden is rejoicing in her dance. "
        ...     "The colorful flowers around her sway in the gentle breeze, with roses, chrysanthemums, and lilies each "
        ...     "releasing their fragrances, creating a relaxed and joyful atmosphere."
        ... )
        >>> sample_size = (672, 384)
        >>> num_frames = 49

        >>> input_video, _, _ = get_video_to_video_latent(control_video, num_frames, sample_size)
        >>> video = pipe(
        ...     prompt,
        ...     num_frames=num_frames,
        ...     negative_prompt="Twisted body, limb deformities, text subtitles, comics, stillness, ugliness, errors, garbled text.",
        ...     height=sample_size[0],
        ...     width=sample_size[1],
        ...     control_video=input_video,
        ... ).frames[0]
        >>> export_to_video(video, "output.mp4", fps=8)
        ```
c                    t        | t        j                        rLt        j                  j                  j                  | j                  d      |dd      j                  d      } nt        | t        j                        r/| j                  |d   |d   f      } t        j                  |       } ngt        | t        j                        rBt        j                  |       j                  |d   |d   f      } t        j                  |       } nt        d      t        | t        j                        s7t        j                  |       j!                  ddd      j#                         dz  } | S )	zd
    Preprocess a single image (PIL.Image, numpy.ndarray, or torch.Tensor) to a resized tensor.
    r   bilinearFsizemodealign_cornersr   zKUnsupported input type. Expected PIL.Image, numpy.ndarray, or torch.Tensor.   g     o@)
isinstancetorchTensornn
functionalinterpolate	unsqueezesqueezer   resizenparrayndarray	fromarray
ValueError
from_numpypermutefloat)imagesample_sizes     w/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/easyanimate/pipeline_easyanimate_control.pypreprocess_imager7   ]   s    %&##//OOA[zQV 0 

'!* 	 
E5;;	'k!nk!n=>	E2::	&&--{1~{1~.NOfgg eU\\*  '//1a8>>@5HL    c                    | >| D cg c]  }t        ||       } }t        j                  |       d | } | j                  dddd      j	                  d      } |t        ||      }t        j
                  |dk  dd	      }|j	                  d      j	                  d
      j                  g d      j	                  d      }t        j                  |dd| j                         d   ddg      }|j                  | j                  | j                        }n2t        j                  | d d d df         }d	|d d d d d d f<   nd\  } }|1t        ||      }|j                  dddd      j	                  d      }nd }| ||fS c c}w )N)r5   r   r   r"   r   )r   g?           )r   r   r   r"   NN)r7   r$   stackr2   r)   wheretiler   todevicedtype
zeros_like)input_video
num_framesr5   validation_video_mask	ref_imageframeinput_video_masks          r6   get_video_to_video_latentrK   x   s   U`aE';Gaa kk+.{
; "))!Q15??B ,$45JQ\$]!${{+@;+NPSUXY  099!<FFrJRRS_`jjklm$zz*:Q;CSCSCUVWCXZ[]^<_`/22;3E3E{GXGXY$//ArrE0BC(+Q1W%(2%%$Y[A	%%aAq1;;A>		()33= bs   Fc                 $   |}|}| \  }}||z  }|||z  kD  r|}t        t        ||z  |z              }	n|}	t        t        ||z  |z              }t        t        ||z
  dz              }
t        t        ||	z
  dz              }|
|f|
|z   ||	z   ffS )Ng       @)intround)src	tgt_width
tgt_heighttwthhwrresize_heightresize_widthcrop_top	crop_lefts               r6   get_resize_crop_region_for_gridr[      s    	B	BDAq	AABG}5a!,-E"q&1*-.5"},345HE2,345Ii 8m#;Y=U"VVVr8   c                     |j                  t        t        d|j                              d      }| j                  t        t        d| j                              d      }| ||z  z  }||z  d|z
  | z  z   } | S )a  
    Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
    Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
    Flawed](https://huggingface.co/papers/2305.08891).

    Args:
        noise_cfg (`torch.Tensor`):
            The predicted noise tensor for the guided diffusion process.
        noise_pred_text (`torch.Tensor`):
            The predicted noise tensor for the text-guided diffusion process.
        guidance_rescale (`float`, *optional*, defaults to 0.0):
            A rescale factor applied to the noise predictions.

    Returns:
        noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
    r   T)dimkeepdim)stdlistrangendim)	noise_cfgnoise_pred_textguidance_rescalestd_textstd_cfgnoise_pred_rescaleds         r6   rescale_noise_cfgri      s    " ""tE!_5I5I,J'KUY"ZHmmU1inn%= >mMG#x''9: #66!>N:NR[9[[Ir8   c           	         |j                         }|rt        |dd        }d|d<   t        j                  | d d d d ddd d d d f   |dd      }t        |dd        }|d   dz
  |d<   |d   dk7  rFt        j                  | d d d d dd d d d d f   |dd      }t	        j
                  ||gd      }|S |}|S t        |dd        }t        j                  | |dd      }|S )Nr"   r   r   	trilinearFr   r]   )r   r`   Fr(   r$   cat)masklatentprocess_first_frame_onlylatent_sizetarget_sizefirst_frame_resizedremaining_frames_resizedresized_masks           r6   resize_maskrw      s   ++-K;qr?+AmmAqsAq!+UZ
 ;qr?+$Q!+Aq>Q'(}}Q12q!^$;[X]($ !99&9;S%TZ[\L 	 /L  ;qr?+}}T+]bcr8   num_inference_stepsrB   	timestepssigmasc                    ||t        d      |dt        t        j                  | j                        j
                  j                               v }|st        d| j                   d       | j                  d
||d| | j                  }t        |      }||fS |dt        t        j                  | j                        j
                  j                               v }|st        d| j                   d       | j                  d
||d| | j                  }t        |      }||fS  | j                  |fd	|i| | j                  }||fS )a  
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
            must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`List[int]`, *optional*):
            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
            `num_inference_steps` and `sigmas` must be `None`.
        sigmas (`List[float]`, *optional*):
            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
            `num_inference_steps` and `timesteps` must be `None`.

    Returns:
        `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesry   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)ry   rB   rz   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)rz   rB   rB    )
r0   setinspect	signatureset_timesteps
parameterskeys	__class__ry   len)	schedulerrx   rB   ry   rz   kwargsaccepts_timestepsaccept_sigmass           r6   retrieve_timestepsr      s   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	 	M)FMfM''	!)n ))) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	 	GvfGG''	!)n ))) 	 	 3MFMfM''	)))r8   c            5       P    e Zd ZdZdZg dZdedeee	f   dee
ef   dedef
 fd	Z	 	 	 	 	 	 	 	 	 	 d:deeee   f   dededeeeee   f      deej*                     deej*                     deej*                     deej*                     deej,                     deej.                     defdZd Z	 	 	 	 	 	 d;dZ	 d<dZd Zed        Zed        Zed        Z ed         Z!ed!        Z" ejF                          e$e%      dd"d#d#dddd$d%dd
d&ddddddd'ddd(gd&dfdeeee   f   d)ee   d*ee   d+ee   d,eejL                     d-eejL                     d.eejL                     d/ee   d0ee'   deeeee   f      dee   d1ee'   d2eeejP                  eejP                     f      d(eej*                     deej*                     deej*                     deej*                     deej*                     d3ee   d4ed5eee)eee*gdf   e+e,f      d6ee   d7e'd8eee      f0d9              Z- xZ.S )=EasyAnimateControlPipelinea  
    Pipeline for text-to-video generation using EasyAnimate.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

    EasyAnimate uses one text encoder [qwen2 vl](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct) in V5.1.

    Args:
        vae ([`AutoencoderKLMagvit`]):
            Variational Auto-Encoder (VAE) Model to encode and decode video to and from latent representations.
        text_encoder (Optional[`~transformers.Qwen2VLForConditionalGeneration`, `~transformers.BertModel`]):
            EasyAnimate uses [qwen2 vl](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct) in V5.1.
        tokenizer (Optional[`~transformers.Qwen2Tokenizer`, `~transformers.BertTokenizer`]):
            A `Qwen2Tokenizer` or `BertTokenizer` to tokenize text.
        transformer ([`EasyAnimateTransformer3DModel`]):
            The EasyAnimate model designed by EasyAnimate Team.
        scheduler ([`FlowMatchEulerDiscreteScheduler`]):
            A scheduler to be used in combination with EasyAnimate to denoise the encoded image latents.
    ztext_encoder->transformer->vae)latentsprompt_embedsnegative_prompt_embedsvaetext_encoder	tokenizertransformerr   c                    t         |           | j                  |||||       t        | dd        | j                  j
                  j                  nd| _        t        | dd       | j                  j                  nd| _	        t        | dd       | j                  j                  nd| _        t        | j                        | _        t        | j                  ddd	      | _        t        | j                        | _        y )
N)r   r   r   r   r   r   Tr         )vae_scale_factorF)r   do_normalizedo_binarizedo_convert_grayscale)super__init__register_modulesgetattrr   configenable_text_attention_maskr   spatial_compression_ratiovae_spatial_compression_ratiotemporal_compression_ratiovae_temporal_compression_ratior   image_processormask_processorr   video_processor)selfr   r   r   r   r   r   s         r6   r   z#EasyAnimateControlPipeline.__init__9  s     	%# 	 	
 t]D1= ##>> 	' 3:$t2L2XDHH..^_ 	* 4;43M3YDHH//_` 	+  1$BdBde/!??!%	
  .t?a?abr8   r   TNpromptnum_images_per_promptdo_classifier_free_guidancenegative_promptr   r   prompt_attention_masknegative_prompt_attention_maskrB   rC   max_sequence_lengthc           	         |
xs | j                   j                  }
|	xs | j                   j                  }	|t        |t              rd}n-|t        |t
              rt        |      }n|j                  d   }|t        |t              rdd|dgdg}n|D cg c]  }dd|dgd }}|D cg c]!  }| j                  j                  |gdd	      # }}| j                  |d
|dddd      }|j                  | j                   j                        }|j                  }|j                  }| j                  r"| j                  ||d      j                  d   }nt        d      |j!                  |d      }|j                  |
|	      }|j                  \  }}}|j!                  d|d      }|j#                  ||z  |d      }|j                  |	      }|r||t        |t              rdd|dgdg}n|D cg c]  }dd|dgd }}|D cg c]!  }| j                  j                  |gdd	      # }}| j                  |d
|dddd      }|j                  | j                   j                        }|j                  }|j                  }| j                  r"| j                  ||d      j                  d   }nt        d      |j!                  |d      }|r]|j                  d   }|j                  |
|	      }|j!                  d|d      }|j#                  ||z  |d      }|j                  |	      }||||fS c c}w c c}w c c}w c c}w )a[  
        Encodes the prompt into text encoder hidden states.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                prompt to be encoded
            device: (`torch.device`):
                torch device
            dtype (`torch.dtype`):
                torch dtype
            num_images_per_prompt (`int`):
                number of images that should be generated per prompt
            do_classifier_free_guidance (`bool`):
                whether to use classifier free guidance or not
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the image generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                argument.
            prompt_attention_mask (`torch.Tensor`, *optional*):
                Attention mask for the prompt. Required when `prompt_embeds` is passed directly.
            negative_prompt_attention_mask (`torch.Tensor`, *optional*):
                Attention mask for the negative prompt. Required when `negative_prompt_embeds` is passed directly.
            max_sequence_length (`int`, *optional*): maximum sequence length to use for the prompt.
        r   r   usertext)typer   )rolecontentFT)tokenizeadd_generation_prompt
max_lengthrightpt)r   paddingr   
truncationreturn_attention_maskpadding_sidereturn_tensors)	input_idsattention_maskoutput_hidden_stateszLLM needs attention_mask)rC   rB   r<   rB   )r   rC   rB   r#   strr`   r   shaper   apply_chat_templaterA   r   r   r   hidden_statesr0   repeatview)r   r   r   r   r   r   r   r   r   rB   rC   r   
batch_sizemessages_promptmr   text_inputstext_input_idsbs_embedseq_len__negative_prompts                          r6   encode_promptz(EasyAnimateControlPipeline.encode_prompt`  s   Z 0**0034,,33*VS"9JJvt$<VJ&,,Q/J &#& !'-3V$D#E $*
   !'-3W$E#F  nvhi22A3^b2cD  ..$.&*$# ) K &..):):)A)ABK(22N$/$>$>!.. $ 1 1,=Rim !2 !-!$ !!;<<$9$@$@AVXY$Z!%((uV(D,22'1%,,Q0EqI%**86K+KWVXY 5 8 8 8 G '+A+I*z/3/O !'-3_$M#N -<
 ) !'-3=M$N#O  nvhi22A3^b2cD  ..$.&*$# ) K &..):):)A)ABK(22N-8-G-G*..)-):):,#A)- *; *  -	*$& !!;<<-K-R-RShjk-l*&,2215G%;%>%>USY%>%Z"%;%B%B1F[]^%_"%;%@%@NcAcelnp%q"-K-N-NV\-N-]*46KMkkkAXs   L=.&M:M&Mc                 V   dt        t        j                  | j                  j                        j
                  j                               v }i }|r||d<   dt        t        j                  | j                  j                        j
                  j                               v }|r||d<   |S )Neta	generator)r}   r~   r   r   stepr   r   )r   r   r   accepts_etaextra_step_kwargsaccepts_generators         r6   prepare_extra_step_kwargsz4EasyAnimateControlPipeline.prepare_extra_step_kwargs  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  r8   c
           
          |dz  dk7  s|dz  dk7  rt        d| d| d      |	Lt         fd|	D              s8t        d j                   d|	D 
cg c]  }
|
 j                  vs|
 c}
       ||t        d	| d
| d      ||t        d      |7t        |t              s't        |t
              st        dt        |             ||t        d      ||t        d| d| d      ||t        d      |C|@|j                  |j                  k7  r&t        d|j                   d|j                   d      y y y c c}
w )N   r   z8`height` and `width` have to be divisible by 16 but are z and .c              3   :   K   | ]  }|j                   v   y wN)_callback_tensor_inputs).0kr   s     r6   	<genexpr>z:EasyAnimateControlPipeline.check_inputs.<locals>.<genexpr>#  s#      F
23A---F
s   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is zEMust provide `prompt_attention_mask` when specifying `prompt_embeds`.z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zWMust provide `negative_prompt_attention_mask` when specifying `negative_prompt_embeds`.zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` )r0   allr   r#   r   r`   r   r   )r   r   heightwidthr   r   r   r   r   "callback_on_step_end_tensor_inputsr   s   `          r6   check_inputsz'EasyAnimateControlPipeline.check_inputs  s!    B;!urzQWX^W__dejdkklmnn-9# F
7YF
 C
 DTEaEaDbbn  |^  pHvw  bc  ko  kG  kG  bGpq  pH  oI  J  -";08N}o ^0 0  ^ 5w  FC)@TZ\`IaQRVW]R^Q_`aa$)>)Fdee&+A+M9/9J K*++]_ 
 "-2P2Xvww$)?)K""&<&B&BB --:-@-@,A B.445Q8  C *L$7 pHs   E
%E
c
                    |	|	j                  ||      S |||dz
  | j                  z  dz   || j                  z  || j                  z  f}
t        |t              r)t        |      |k7  rt        dt        |       d| d      t        |
|||      }	t        | j                  d      r|	| j                  j                  z  }	|	S )NrB   rC   r   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r   rB   rC   init_noise_sigma)rA   r   r   r#   r`   r   r0   r   hasattrr   r   )r   r   num_channels_latentsrF   r   r   rC   rB   r   r   r   s              r6   prepare_latentsz*EasyAnimateControlPipeline.prepare_latentsJ  s     ::V5:99  !^ C CCaGd888T777
 i&3y>Z+GA#i.AQ R&<'gi 
 u	&PUV4>>#56 ? ??Gr8   c
                    ||j                  ||      }d}
g }t        d|j                  d   |
      D ]I  }||||
z    }| j                  j	                  |      d   }|j                         }|j                  |       K t        j                  |d      }|| j                  j                  j                  z  }||j                  ||      }d}
g }t        d|j                  d   |
      D ]I  }||||
z    }| j                  j	                  |      d   }|j                         }|j                  |       K t        j                  |d      }|| j                  j                  j                  z  }||fS d }||fS )Nr   r   r   rl   )rA   ra   r   r   encoder    appendr$   rn   r   scaling_factor)r   controlcontrol_imager   r   r   rC   rB   r   r   bsnew_controli
control_bsnew_control_pixel_valuescontrol_pixel_values_bscontrol_image_latentss                    r6   prepare_control_latentsz2EasyAnimateControlPipeline.prepare_control_latentsd  s    jjej<GBK1gmmA.3 /$QR0
!XX__Z8;
'__.
"":.	/
 ii3G > >>G$),,F%,HMB')$1m11!4b9 I*7AF*C'*.((//:Q*RST*U'*A*F*F*H'(//0GH	I
 %*II.FA$N!$9DHHOO<Z<Z$Z! --- %)!---r8   c                     | j                   S r   _guidance_scaler   s    r6   guidance_scalez)EasyAnimateControlPipeline.guidance_scale  s    ###r8   c                     | j                   S r   )_guidance_rescaler  s    r6   re   z+EasyAnimateControlPipeline.guidance_rescale  s    %%%r8   c                      | j                   dkD  S )Nr   r   r  s    r6   r   z6EasyAnimateControlPipeline.do_classifier_free_guidance  s    ##a''r8   c                     | j                   S r   )_num_timestepsr  s    r6   num_timestepsz(EasyAnimateControlPipeline.num_timesteps  s    """r8   c                     | j                   S r   )
_interruptr  s    r6   	interruptz$EasyAnimateControlPipeline.interrupt  s    r8   1   i   2   g      @r:   pilr   rF   r   r   control_videocontrol_camera_videorH   rx   r  r   r   output_typereturn_dictcallback_on_step_endr   re   ry   c                 T   t        |t        t        f      r|j                  }t	        |dz  dz        }t	        |dz  dz        }| j                  ||||
|||||	       |	| _        || _        d| _        |t        |t              rd}n-|t        |t              rt        |      }n|j                  d   }| j                  }| j                  | j                  j                  }n| j                   j                  }| j#                  ||||| j$                  |
||||d      \  }}}}t        | j&                  t(              rt+        | j&                  |||d      \  }}nt+        | j&                  |||      \  }}| j&                  j,                  }| j.                  j0                  j2                  }| j5                  ||z  ||||||||	      }|Lt7        ||d	      }|d
z  }| j$                  rt9        j:                  |gdz        n|j=                  ||      }n\||j                  \  }}}} }!| j>                  jA                  |jC                  ddddd      jE                  ||z  || |!      ||      }|j=                  t8        jF                        }|jE                  |||||      jC                  ddddd      }| jI                  d|||||||| j$                  	      d   }| j$                  rt9        j:                  |gdz        n|j=                  ||      }n\t9        jJ                  |      j=                  ||      }| j$                  rt9        j:                  |gdz        n|j=                  ||      }|Z|j                  \  }}}} }!| j>                  jA                  |jC                  ddddd      jE                  ||z  || |!      ||      }|j=                  t8        jF                        }|jE                  |||||      jC                  ddddd      }| jI                  d|||||j                  ||| j$                  	      d   }"t9        jJ                  |      }#|jM                         d   dk7  r|"|#ddddddf<   | j$                  rt9        j:                  |#gdz        n|#j=                  ||      }#t9        j:                  ||#gd      }net9        jJ                  |      }#| j$                  rt9        j:                  |#gdz        n|#j=                  ||      }#t9        j:                  ||#gd      }| jO                  ||      }$| j$                  r.t9        j:                  ||g      }t9        j:                  ||g      }|j=                  |      }|j=                  |      }t        |      || j&                  jP                  z  z
  }%t        |      | _)        | jU                  |      5 }&tW        |      D ]A  \  }'}(| jX                  r| j$                  rt9        j:                  |gdz        n|})t[        | j&                  d      r| j&                  j]                  |)|(      })t9        j^                  |(g|)j                  d   z  |      j=                  |)j                        }*| j!                  |)|*||d      d   }+|+jM                         d   | j.                  j0                  j2                  k7  r|+ja                  dd      \  }+},| j$                  r|+ja                  d      \  }-}.|-|	|.|-z
  z  z   }+| j$                  r|dkD  rtc        |+.|      }+ | j&                  jd                  |+|(|fi |$ddid   }|Zi }/|D ]  }0tg               |0   |/|0<     || |'|(|/      }1|1ji                  d|      }|1ji                  d|      }|1ji                  d|      }|'t        |      dz
  k(  s'|'dz   |%kD  r/|'dz   | j&                  jP                  z  dk(  r|&jk                          tl        s.to        jp                          D 	 ddd       |dk(  s/| js                  |      }2| jt                  jw                  |2|      }2n|}2| jy                          |s|2fS t{        |2      S # 1 sw Y   `xY w)a  
        Generates images or video using the EasyAnimate pipeline based on the provided prompts.

        Examples:
            prompt (`str` or `List[str]`, *optional*):
                Text prompts to guide the image or video generation. If not provided, use `prompt_embeds` instead.
            num_frames (`int`, *optional*):
                Length of the generated video (in frames).
            height (`int`, *optional*):
                Height of the generated image in pixels.
            width (`int`, *optional*):
                Width of the generated image in pixels.
            num_inference_steps (`int`, *optional*, defaults to 50):
                Number of denoising steps during generation. More steps generally yield higher quality images but slow
                down inference.
            guidance_scale (`float`, *optional*, defaults to 5.0):
                Encourages the model to align outputs with prompts. A higher value may decrease image quality.
            negative_prompt (`str` or `List[str]`, *optional*):
                Prompts indicating what to exclude in generation. If not specified, use `negative_prompt_embeds`.
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                Number of images to generate for each prompt.
            eta (`float`, *optional*, defaults to 0.0):
                Applies to DDIM scheduling. Controlled by the eta parameter from the related literature.
            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                A generator to ensure reproducibility in image generation.
            latents (`torch.Tensor`, *optional*):
                Predefined latent tensors to condition generation.
            prompt_embeds (`torch.Tensor`, *optional*):
                Text embeddings for the prompts. Overrides prompt string inputs for more flexibility.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Embeddings for negative prompts. Overrides string inputs if defined.
            prompt_attention_mask (`torch.Tensor`, *optional*):
                Attention mask for the primary prompt embeddings.
            negative_prompt_attention_mask (`torch.Tensor`, *optional*):
                Attention mask for negative prompt embeddings.
            output_type (`str`, *optional*, defaults to "latent"):
                Format of the generated output, either as a PIL image or as a NumPy array.
            return_dict (`bool`, *optional*, defaults to `True`):
                If `True`, returns a structured output. Otherwise returns a simple tuple.
            callback_on_step_end (`Callable`, *optional*):
                Functions called at the end of each denoising step.
            callback_on_step_end_tensor_inputs (`List[str]`, *optional*):
                Tensor names to be included in callback function calls.
            guidance_rescale (`float`, *optional*, defaults to 0.0):
                Adjusts noise levels based on guidance scale.

        Returns:
            [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned,
                otherwise a `tuple` is returned where the first element is a list with the generated images and the
                second element is a list of `bool`s indicating whether the corresponding generated image contains
                "not-safe-for-work" (nsfw) content.
        r   FNr   r   )r   rB   rC   r   r   r   r   r   r   r   text_encoder_index)muT)rq      r"   r   r   )r   r   )rC   rl   r   )totalscale_model_input)encoder_hidden_statescontrol_latentsr  r:   )re   r  r   r   r   rp   )videor  )frames)>r#   r   r   tensor_inputsrM   r   r  r  r  r   r`   r   r   _execution_devicer   rC   r   r   r   r   r   r   ry   r   r   latent_channelsr   rw   r$   rn   rA   r   
preprocessr2   reshapefloat32r   rD   r   r   orderr  progress_bar	enumerater  r   r  tensorchunkri   r   localspopupdateXLA_AVAILABLExm	mark_stepdecode_latentsr   postprocess_videomaybe_free_model_hooksr   )3r   r   rF   r   r   r  r  rH   rx   r  r   r   r   r   r   r   r   r   r   r  r  r  r   re   ry   r   rB   rC   r   control_video_latentsr  channelsheight_videowidth_videoref_image_latentsref_image_latents_conv_inr   num_warmup_stepsr&  r   tlatent_model_inputt_expand
noise_predr   noise_pred_uncondrd   callback_kwargsr   callback_outputsr  s3                                                      r6   __call__z#EasyAnimateControlPipeline.__call__  s8	   j *-=?U,VW1E1S1S. flb()Ub[B&' 	"!*.
	
  .!1 *VS"9JJvt$<VJ&,,Q/J''(%%++E$$**E "7(,(H(H+'#9"7+I   
	
"!*  dnn&EF-? 3VY1.*I* .@Pcekmv-w*I*NN,,	  $xx>>&&.. 

  +$/0Dghl$m!$9A$=!:>:Z:Z		01A56`ub  &JWJ]J]GJ*lK 00;;%%aAq!4<<+X|[  < M *,,5==,AM)11*j(TZ\abjj1aAM %)$@$@00
% 
%! ;?:Z:Z		01A56`ub  %*$4$4W$=$@$@$O!:>:Z:Z		01A56`ub   JS//GJ*lK,,77!!!Q1a088j9PRZ\hjuv 8 I
 "5==9I!))*j(FTYZbbcdfgijlmopqI $ < <##00
! 
! ).(8(8(A%||~a A%6G)!Q(3 33 		459:.b	 &
 $ii:S(TZ[\O(-(8(8(A% 33 		459:.b	 &
 $ii:S(TZ[\O !::9cJ++!II'=}&MNM$)II/MOd.e$f! &(((7 5 8 8 8 G y>,?$..BVBV,VV!)n%89 3	#\!), 2#1>> BFAaAaUYYy1}%=gn"4>>+>?)-)I)IJ\^_)`& !<<.@.F.Fq.I(IRXY\\,22 ]  "--&*7$3 % .  
 ??$Q'488??+J+JJ$.$4$4QA$4$>MJ 339C9I9I!9L6%!2^YjGj5k!kJ338H38N!2:aq!rJ .$..--j!WmHYmglmnop'3&(O? 9-3Xa[*9';D!Q'X$.229gFG$4$8$8-$XM-=-A-ABZ\r-s*I**A9I/IqSTuX\XfXfXlXlNlpqNq '') LLNe2#3	#l h&''0E((::T_:`EE 	##%8O(66E3	# 3	#s   -H8b'bb')
r   TNNNNNNN   )NNNNNNr   )/__name__
__module____qualname____doc__model_cpu_offload_seqr   r   r   r   r	   r   r
   r   r   r   r   r   rM   boolr   r$   r%   rB   rC   r   r   r   r   r   propertyr  re   r   r	  r  no_gradr   EXAMPLE_DOC_STRINGFloatTensorr3   	Generatorr   r   r   r   rA  __classcell__)r   s   @r6   r   r      sX   * =T$c $c ;YFG$c 67	$c
 3$c 3$cT &',0;?049=8<AE)-'+#&`lc49n%`l  #`l &*	`l
 "%T#Y"78`l  -`l !) 6`l  (5`l )1(>`l &`l $`l !`lF!, #"'++/4n nr4!.F $ $ & & ( ( # #   U]]_12 )-$& #"269=.2-/*-;?/0"MQ*.049=8<AE%*  9B"%)-7B7c49n%B7 SMB7 	B7
 }B7 U../B7 $E$5$56B7 **+B7 &c]B7 !B7 "%T#Y"78B7  (}B7 e_B7 E%//43H"HIJB7 %,,'B7   -!B7" !) 6#B7$  (5%B7& )1(>'B7( c])B7* +B7, '(Cd+T124DF\\]
-B72 -1I3B74  5B76 DI&7B7 3 B7r8   r   r=   )r:   )T)NNNN)?r~   typingr   r   r   r   r   numpyr,   r$   torch.nn.functionalr&   r'   rm   PILr   transformersr	   r
   r   r   	callbacksr   r   r   r   modelsr   r   pipelines.pipeline_utilsr   
schedulersr   utilsr   r   r   utils.torch_utilsr   r   r   pipeline_outputr   torch_xla.core.xla_modelcore	xla_modelr.  r-  
get_loggerrC  loggerrK  r7   rK   r[   ri   rw   rM   r   rB   r3   r   r   r|   r8   r6   <module>r`     s
     8 8       B 0 H 9 9 O O - - 6 ))MM			H	%' T6!4JW&68 *.15%)$(8*!#8* U3,-.8* S	"	8*
 T%[!8*vB7!2 B7r8   