
    bi                        d dl Z d dlmZmZmZmZmZmZmZ d dl	Z
d dlZd dlmZmZ ddlmZmZ ddlmZmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZmZmZ ddl m!Z! ddl"m#Z#  e       rd dl$m%c m&Z' dZ(ndZ( ejR                  e*      Z+dZ,	 	 	 dde-de.de.de.fdZ/	 	 	 	 ddee-   deee0ejb                  f      deee-      deee.      fdZ2 G d de      Z3y)    N)AnyCallableDictListOptionalTupleUnion)AutoTokenizerGlmModel   )MultiPipelineCallbacksPipelineCallback)PipelineImageInputVaeImageProcessor)AutoencoderKLCogView4Transformer2DModel)DiffusionPipeline)FlowMatchEulerDiscreteScheduler)is_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )CogView4PipelineOutputTFaw  
    Examples:
        ```python
        >>> import torch
        >>> from diffusers import CogView4ControlPipeline

        >>> pipe = CogView4ControlPipeline.from_pretrained("THUDM/CogView4-6B-Control", torch_dtype=torch.bfloat16)
        >>> control_image = load_image(
        ...     "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
        ... )
        >>> prompt = "A bird in space"
        >>> image = pipe(prompt, control_image=control_image, height=1024, width=1024, guidance_scale=3.5).images[0]
        >>> image.save("cogview4-control.png")
        ```
base_seq_len
base_shift	max_shiftreturnc                 &    | |z  dz  }||z  |z   }|S )Ng      ? )image_seq_lenr   r   r   mmus         q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/cogview4/pipeline_cogview4_control.pycalculate_shiftr%   <   s&     
	%#-A	
Y	#BI    num_inference_stepsdevice	timestepssigmasc                 4   dt        t        j                  | j                        j                  j                               v }dt        t        j                  | j                        j                  j                               v }|P|N|s|st        d| j                   d       | j                  d|||d| | j                  }t        |      }||fS |M|K|st        d| j                   d       | j                  d||d| | j                  }t        |      }||fS |M|K|st        d| j                   d       | j                  d||d	| | j                  }t        |      }||fS  | j                  |fd
|i| | j                  }||fS )a  
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
            must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`List[int]`, *optional*):
            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
            `num_inference_steps` and `sigmas` must be `None`.
        sigmas (`List[float]`, *optional*):
            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
            `num_inference_steps` and `timesteps` must be `None`.

    Returns:
        `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    r)   r*   zThe current scheduler class z's `set_timesteps` does not support custom timestep or sigma schedules. Please check whether you are using the correct scheduler.)r)   r*   r(   zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r)   r(   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r*   r(   r(   r    )
setinspect	signatureset_timesteps
parameterskeys
ValueError	__class__r)   len)	schedulerr'   r(   r)   r*   kwargsaccepts_timestepsaccepts_sigmass           r$   retrieve_timestepsr9   H   s   > $s7+<+<Y=T=T+U+`+`+e+e+g'hhW%6%6y7N7N%O%Z%Z%_%_%a!bbN!3 .y/B/B.C Dj k  	 	\)F6\U[\''	!)n, )))+ 
	6> .y/B/B.C Da b  	 	M)FMfM''	!)n ))) 
	v1.y/B/B.C D_ `  	 	GvfGG''	!)n ))) 	 	 3MFMfM''	)))r&   c            3           e Zd ZdZg ZdZg dZdedede	de
def
 fd	Z	 	 	 	 d8deeee   f   dedeej&                     deej(                     fdZ	 	 	 	 	 	 	 	 d9deeee   f   deeeee   f      dededeej.                     deej.                     deej&                     deej(                     defdZd:dZ	 	 d;dZ	 	 d<dZed        Zed        Zed        Zed        Z ed         Z!ed!        Z" ejF                          e$e%      d
d
d
d
d
d"d
d
d#dd
d
d
d
d
d$d%dd
d
d&gdfdeeeee   f      deeeee   f      d'e&d(ee   d)ee   d*ed+eee      d,eee'      d-e'ded.eeejP                  eejP                     f      d&eejR                     deejR                     deejR                     d/ee*eef      d0e*eef   d1ed2ed3ee+ee,f      d4eee-eee+gd
f   e.e/f      d5ee   ded6ee0e*f   f.d7              Z1 xZ2S )=CogView4ControlPipelineaR  
    Pipeline for text-to-image generation using CogView4.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

    Args:
        vae ([`AutoencoderKL`]):
            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
        text_encoder ([`GLMModel`]):
            Frozen text-encoder. CogView4 uses [glm-4-9b-hf](https://huggingface.co/THUDM/glm-4-9b-hf).
        tokenizer (`PreTrainedTokenizer`):
            Tokenizer of class
            [PreTrainedTokenizer](https://huggingface.co/docs/transformers/main/en/main_classes/tokenizer#transformers.PreTrainedTokenizer).
        transformer ([`CogView4Transformer2DModel`]):
            A text conditioned `CogView4Transformer2DModel` to denoise the encoded image latents.
        scheduler ([`SchedulerMixin`]):
            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    ztext_encoder->transformer->vae)latentsprompt_embedsnegative_prompt_embeds	tokenizertext_encodervaetransformerr5   c                 
   t         |           | j                  |||||       t        | dd       r/dt	        | j
                  j                  j                        dz
  z  nd| _        t        | j                        | _
        y )N)r?   r@   rA   rB   r5   rA      r      )vae_scale_factor)super__init__register_modulesgetattrr4   rA   configblock_out_channelsrF   r   image_processor)selfr?   r@   rA   rB   r5   r3   s         r$   rH   z CogView4ControlPipeline.__init__   s~     	lQ\hq 	 	
 W^^bdikoVpc$((//*L*L&MPQ&Q Rvw0$BWBWXr&   N   promptmax_sequence_lengthr(   dtypec                    |xs | j                   }|xs | j                  j                  }t        |t              r|gn|}| j                  |d|ddd      }|j                  }| j                  |dd      j                  }|j                  d   |j                  d   k\  rXt        j                  ||      sB| j
                  j                  |d d |dz
  df         }t        j                  d| d	|        |j                  d   }	d
|	d
z  z
  d
z  }
|
dkD  rit        j                  |j                  d   |
f| j
                  j                  |j                  |j                        }t        j                   ||gd      }| j                  |j#                  |      d      j$                  d   }|j#                  ||      }|S )NlongestTpt)padding
max_length
truncationadd_special_tokensreturn_tensors)rV   rZ   r   zXThe following part of your input was truncated because `max_sequence_length` is set to  z	 tokens:    r   )
fill_valuerR   r(   dim)output_hidden_statesrR   r(   )_execution_devicer@   rR   
isinstancestrr?   	input_idsshapetorchequalbatch_decodeloggerwarningfullpad_token_idr(   cattohidden_states)rN   rP   rQ   r(   rR   text_inputstext_input_idsuntruncated_idsremoved_textcurrent_length
pad_lengthpad_idsr=   s                r$   _get_glm_embedsz'CogView4ControlPipeline._get_glm_embeds   s    14110**00'4&&nn*# % 
 %....SW.Xbb  $(<(<R(@@UcetIu>>66qJ]`aJadfJfGf7ghLNN'(	,A (--a0NR/0B6
>jj%%a(*5>>66$**%,,	G #YY'@aHN)).*;*;F*CZ^)_mmnpq%((uV(Dr&   Tr   negative_promptdo_classifier_free_guidancenum_images_per_promptr=   r>   c
                    |xs | j                   }t        |t              r|gn|}|t        |      }
n|j                  d   }
|| j                  ||	||      }|j                  d      }|j                  d|d      }|j                  |
|z  |d      }|r||xs d}t        |t              r|
|gz  n|}|:t        |      t        |      ur$t        dt        |       dt        |       d      |
t        |      k7  r!t        d| d	t        |       d
| d	|
 d	      | j                  ||	||      }|j                  d      }|j                  d|d      }|j                  |
|z  |d      }||fS )a  
        Encodes the prompt into text encoder hidden states.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                prompt to be encoded
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the image generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            do_classifier_free_guidance (`bool`, *optional*, defaults to `True`):
                Whether to use classifier free guidance or not.
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                Number of images that should be generated per prompt. torch device to place the resulting embeddings on
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                argument.
            device: (`torch.device`, *optional*):
                torch device
            dtype: (`torch.dtype`, *optional*):
                torch dtype
            max_sequence_length (`int`, defaults to `1024`):
                Maximum sequence length in encoded prompt. Can be set to other values but may lead to poorer results.
        r   r   r[    z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)rc   rd   re   r4   rg   ry   sizerepeatviewtype	TypeErrorr2   )rN   rP   rz   r{   r|   r=   r>   r(   rR   rQ   
batch_sizeseq_lens               r$   encode_promptz%CogView4ControlPipeline.encode_prompt   s   P 1411'4&&VJ&,,Q/J  009LfV[\M$$Q'%,,Q0EqI%**:8M+MwXZ[&+A+I-3O@J?\_@`jO+<<fuO!d6l$:O&OUVZ[jVkUl mV~Q(  s?33 )/)::J3K_J` ax/
| <33  &*%9%9/K^`fhm%n",11!4G%;%B%B1F[]^%_"%;%@%@NcAcelnp%q"444r&   c	                     ||j                  |      S ||t        |      | j                  z  t        |      | j                  z  f}	t        |t              r)t        |      |k7  rt        dt        |       d| d      t        |	|||      }|S )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)	generatorr(   rR   )rp   intrF   rd   listr4   r2   r   )
rN   r   num_channels_latentsheightwidthrR   r(   r   r<   rg   s
             r$   prepare_latentsz'CogView4ControlPipeline.prepare_latents0  s    ::f%%  K4000J$///	
 i&3y>Z+GA#i.AQ R&<'gi  u	&PUVr&   c
                 R   t        |t        j                        rn| j                  j	                  |||      }|j
                  d   }
|
dk(  r|}n|}|j                  |d|j
                  d   |z        }|j                  ||      }|r|	st        j                  |gdz        }|S )N)r   r   r   r   )r_   output_size)r(   rR   rD   )	rd   rh   TensorrM   
preprocessrg   repeat_interleaverp   ro   )rN   imager   r   r   r|   r(   rR   r{   
guess_modeimage_batch_size	repeat_bys               r$   prepare_imagez%CogView4ControlPipeline.prepare_imageB  s     eU\\*((33E&PU3VE ;;q>q "I .I''	qekkRSnW`F`'ae4&zIIugk*Er&   c           
          |dz  dk7  s|dz  dk7  rt        d| d| d      |Lt         fd|D              s8t        d j                   d|D cg c]  }| j                  vs| c}       ||t        d	| d
| d      ||t        d      |7t        |t              s't        |t
              st        dt        |             ||t        d	| d| d      ||t        d| d| d      |C|@|j                  |j                  k7  r&t        d|j                   d|j                   d      y y y c c}w )Nr\   r   z8`height` and `width` have to be divisible by 16 but are z and r   c              3   :   K   | ]  }|j                   v   y wN)_callback_tensor_inputs).0krN   s     r$   	<genexpr>z7CogView4ControlPipeline.check_inputs.<locals>.<genexpr>q  s#      F
23A---F
s   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z and `negative_prompt_embeds`: z'Cannot forward both `negative_prompt`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` )r2   allr   rd   re   r   r   rg   )	rN   rP   r   r   rz   "callback_on_step_end_tensor_inputsr=   r>   r   s	   `        r$   check_inputsz$CogView4ControlPipeline.check_inputsd  s'    B;!urzQWX^W__dejdkklmnn-9# F
7YF
 C
 DTEaEaDbbn  |^  pHvw  bc  ko  kG  kG  bGpq  pH  oI  J  -";08N}o ^0 0  ^ 5w  FC)@TZ\`IaQRVW]R^Q_`aa"8"D0 9*++]_ 
 &+A+M9/9J K*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  C *L$5 pHs   E%Ec                     | j                   S r   _guidance_scalerN   s    r$   guidance_scalez&CogView4ControlPipeline.guidance_scale  s    ###r&   c                      | j                   dkD  S )Nr   r   r   s    r$   r{   z3CogView4ControlPipeline.do_classifier_free_guidance  s    ##a''r&   c                     | j                   S r   )_num_timestepsr   s    r$   num_timestepsz%CogView4ControlPipeline.num_timesteps  s    """r&   c                     | j                   S r   )_attention_kwargsr   s    r$   attention_kwargsz(CogView4ControlPipeline.attention_kwargs      %%%r&   c                     | j                   S r   )_current_timestepr   s    r$   current_timestepz(CogView4ControlPipeline.current_timestep  r   r&   c                     | j                   S r   )
_interruptr   s    r$   	interruptz!CogView4ControlPipeline.interrupt  s    r&   2   g      @)r   r   pilr<   control_imager   r   r'   r)   r*   r   r   original_sizecrops_coords_top_leftoutput_typereturn_dictr   callback_on_step_endr   r   c                    t        |t        t        f      r|j                  }|xs- | j                  j
                  j                  | j                  z  }|xs- | j                  j
                  j                  | j                  z  }|xs ||f}||f}| j                  |||||||       |	| _	        || _
        d| _        d| _        |t        |t              rd}n-|t        |t              rt        |      }n|j                   d   }| j"                  }| j%                  ||| j&                  |
||||      \  }}| j                  j
                  j(                  dz  }| j+                  |||||
z  |
|| j,                  j.                        }|j                   dd \  }}d}| j,                  j1                  |      j2                  j5                         }||z
  | j,                  j
                  j6                  z  }| j9                  ||
z  |||t:        j<                  |||      }t;        j>                  |g|j.                  |	      }t;        j>                  |g|j.                  |	      }t;        j>                  |g|j.                  |	      }|jA                  ||
z  d      }|jA                  ||
z  d      }|jA                  ||
z  d      }|| j                  z  || j                  z  z  | j                  j
                  jB                  dz  z  }|5tE        jF                  | jH                  j
                  jJ                  d
|      ntE        jL                  |      }|jO                  tD        jP                        jO                  tD        j<                        }|#|| jH                  j
                  jJ                  z  n|}tS        || jH                  j
                  jU                  dd      | jH                  j
                  jU                  dd      | jH                  j
                  jU                  dd            }tW        | jH                  |||||      \  }}t        |      | _,        | j                  j.                  }t[        t        |      || jH                  j\                  z  z
  d      }| j_                  |      5 } ta        |      D ]  \  }!}"| jb                  r|"| _        t;        jd                  ||gd      jg                  |      }#|"ji                  |j                   d         }$| j	                  |#||$||||d      d   }%| j&                  r2| j	                  |#||$||||d      d   }&|&| jj                  |%|&z
  z  z   }'n|%}'| jH                  jm                  |'|"|d      d   }|qi }(|D ]  })to               |)   |(|)<     || |!| jH                  jp                  |!   |(      }*|*js                  d|      }|*js                  d|      }|*js                  d|      }|!t        |      dz
  k(  s'|!dz   |kD  r/|!dz   | jH                  j\                  z  dk(  r| ju                          tv        sty        jz                           	 ddd       d| _        |dk(  sh|jg                  | j,                  j.                        | j,                  j
                  j6                  z  }| j,                  j}                  |d|      d   }+n|}+| j~                  j                  |+|      }+| j                          |s|+fS t        |+      S # 1 sw Y   xY w)a@  
        Function invoked when calling the pipeline for generation.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the image generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            height (`int`, *optional*, defaults to self.transformer.config.sample_size * self.vae_scale_factor):
                The height in pixels of the generated image. If not provided, it is set to 1024.
            width (`int`, *optional*, defaults to self.transformer.config.sample_size * self.vae_scale_factor):
                The width in pixels of the generated image. If not provided it is set to 1024.
            num_inference_steps (`int`, *optional*, defaults to `50`):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            timesteps (`List[int]`, *optional*):
                Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
                in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
                passed will be used. Must be in descending order.
            sigmas (`List[float]`, *optional*):
                Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
                their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
                will be used.
            guidance_scale (`float`, *optional*, defaults to `5.0`):
                Guidance scale as defined in [Classifier-Free Diffusion
                Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
                of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
                `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
                the text `prompt`, usually at the expense of lower image quality.
            num_images_per_prompt (`int`, *optional*, defaults to `1`):
                The number of images to generate per prompt.
            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                to make generation deterministic.
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor will ge generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                argument.
            original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
                If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
                `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
                explained in section 2.2 of
                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
            crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
                `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
                `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
                `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.pipeline_CogView4.CogView4PipelineOutput`] instead of a plain
                tuple.
            attention_kwargs (`dict`, *optional*):
                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
                `self.processor` in
                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
            callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
                `callback_on_step_end_tensor_inputs`.
            callback_on_step_end_tensor_inputs (`List`, *optional*):
                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
                `._callback_tensor_inputs` attribute of your pipeline class.
            max_sequence_length (`int`, defaults to `224`):
                Maximum sequence length in encoded prompt. Can be set to other values but may lead to poorer results.
        Examples:

        Returns:
            [`~pipelines.cogview4.pipeline_CogView4.CogView4PipelineOutput`] or `tuple`:
            [`~pipelines.cogview4.pipeline_CogView4.CogView4PipelineOutput`] if `return_dict` is True, otherwise a
            `tuple`. When returning a tuple, the first element is a list with the generated images.
        NFr   r   )r|   r=   r>   rQ   r(   rD   )r   r   r   r   r|   r(   rR   ra   rb   g      ?base_image_seq_len   r         ?r         ?)r#   )totalr^   )rq   encoder_hidden_statestimestepr   target_sizecrop_coordsr   r   )r   r<   r=   r>   latent)r   r   )r   )images)Crd   r   r   tensor_inputsrB   rK   sample_sizerF   r   r   r   r   r   re   r   r4   rg   rc   r   r{   in_channelsr   rA   rR   encodelatent_distsamplescaling_factorr   rh   float32tensorr   
patch_sizenplinspacer5   num_train_timestepsarrayastypeint64r%   getr9   r   maxorderprogress_bar	enumerater   ro   rp   expandr   steplocalsr*   popupdateXLA_AVAILABLExm	mark_stepdecoderM   postprocessmaybe_free_model_hooksr   ),rN   rP   rz   r   r   r   r'   r)   r*   r   r|   r   r<   r=   r>   r   r   r   r   r   r   r   rQ   r   r   r(   latent_channelsvae_shift_factorr!   r#   transformer_dtypenum_warmup_stepsr   itlatent_model_inputr   noise_pred_condnoise_pred_uncond
noise_predcallback_kwargsr   callback_outputsr   s,                                               r$   __call__z CogView4ControlPipeline.__call__  s+   f *-=?U,VW1E1S1S.V4++22>>AVAVVT))00<<t?T?TT%8&%uo 	."	
  .!1!% *VS"9JJvt$<VJ&,,Q/J'' 150B0B,,"7'#9 3 1C 	1
-- **11==B**!$99"7((.. + 
 &++BC06BBIIK&)99TXX__=[=[[&&..MM	
 m_M<O<OX^_llK=8K8KTZ[ %.C-DML_L_hn o%,,Z:O-OQRS!((6K)KQO 5 < <ZJ_=_ab c !D$9$99etG\G\>\]##..1
   KK--AA3H[\)$ 	
 $$RXX.55bjjA	JP.T^^22FFF^dNN!!%%&:C@NN!!%%lD9NN!!%%k48	
 *<NN/Fr*
&	& ")n ,,22s9~0CdnnFZFZ0ZZ\]^%89 6	#\!), 5#1>>)*&%*YY/GQ%O%R%RSd%e" 88GMM!$45"&"2"2"4*7%"/ + 5%5 % #3 	# 	# 33(,(8(8&8.D!)&3$/$9)9$) )9 	) 	)% "3T5H5HO^oLo5p!pJ!0J..--j!WRW-XYZ[ (3&(O? 9-3Xa[*9';D!T^^EZEZ[\E]_n'o$.229gFG$4$8$8-$XM-=-A-ABZ\r-s*I**A9I/IqSTuX\XfXfXlXlNlpqNq '') LLNk5#6	#p "&h&jj0488??3Q3QQGHHOOG)OTUVWEE$$00K0P 	##%8O%U33Q6	# 6	#s   9F,\;'\;;])NrO   NN)NTr   NNNNrO   r   )FF)NN)3__name__
__module____qualname____doc___optional_componentsmodel_cpu_offload_seqr   r
   r   r   r   r   rH   r	   re   r   r   r   rh   r(   rR   ry   boolr   r   r   r   r   propertyr   r{   r   r   r   r   no_gradr   EXAMPLE_DOC_STRINGr   float	GeneratorFloatTensorr   r   r   r   r   r   r   r   __classcell__)r3   s   @r$   r;   r;      sQ   ( <TY Y Y 	Y
 0Y 3Y& )-#')-'+)c49n%) !) &	)
 $)^ <@,0%&049=)-'+#'M5c49n%M5 "%T#Y"78M5 &*	M5
  #M5  -M5 !) 6M5 &M5 $M5 !M5^6 %* R #1f $ $ ( ( # # & & & &   U]]_12 37;?,0 $##%)-(, #%&MQ/359>B3717  59 9B#'3h4sDI~./h4 "%T#Y"78h4 *	h4
 h4 }h4 !h4 DI&h4 e%h4 h4  #h4 E%//43H"HIJh4 %++,h4   1 12h4 !)):): ;h4   c3h0!h4"  %S#X#h4$ %h4& 'h4( #4S>2)h4* '(Cd+T124DF\\]
+h40 -1I1h42 !3h44 
%u,	-5h4 3 h4r&   r;   )r   r   r   )NNNN)4r-   typingr   r   r   r   r   r   r	   numpyr   rh   transformersr
   r   	callbacksr   r   rM   r   r   modelsr   r   pipelines.pipeline_utilsr   
schedulersr   utilsr   r   r   utils.torch_utilsr   pipeline_outputr   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr   rk   r  r   r  r%   re   r(   r9   r;   r    r&   r$   <module>r     s     D D D   0 A D ? 9 9 O O - 3 ))MM			H	% ( 	  	
  *.15%)$(@*!#@* U3,-.@* S	"	@*
 T%[!@*FQ	4/ Q	4r&   