
    biJ                         d dl mZmZmZmZmZmZmZ d dlZd dl	m
Z
mZ ddlmZmZ ddlmZmZ ddlmZ ddlmZmZ d	d
lmZmZmZ  e       rd dlmc mZ dZndZdZ  G d dee      Z!y)    )AnyCallableDictListOptionalTupleUnionN)CLIPTextModelWithProjectionCLIPTokenizer   )PipelineImageInputVaeImageProcessor)UVit2DModelVQModel)AmusedScheduler)is_torch_xla_availablereplace_example_docstring   )DeprecatedPipelineMixinDiffusionPipelineImagePipelineOutputTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import AmusedInpaintPipeline
        >>> from diffusers.utils import load_image

        >>> pipe = AmusedInpaintPipeline.from_pretrained(
        ...     "amused/amused-512", variant="fp16", torch_dtype=torch.float16
        ... )
        >>> pipe = pipe.to("cuda")

        >>> prompt = "fall mountains"
        >>> input_image = (
        ...     load_image(
        ...         "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/open_muse/mountains_1.jpg"
        ...     )
        ...     .resize((512, 512))
        ...     .convert("RGB")
        ... )
        >>> mask = (
        ...     load_image(
        ...         "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/open_muse/mountains_1_mask.png"
        ...     )
        ...     .resize((512, 512))
        ...     .convert("L")
        ... )
        >>> pipe(prompt, input_image, mask).images[0].save("out.png")
        ```
c            /       V    e Zd ZU dZeed<   eed<   eed<   eed<   e	ed<   e
ed<   dZdgZdededede	de
f
 fd	Z ej                          ee      	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d deeee   ef      dedededededeeeee   f      dee   deej2                     deej4                     deej4                     deej4                     deej4                     dedeeeeej4                  gd
f      dedeeeef      dedeeef   deeeeef   ee   f   f(d              Z  xZ!S )!AmusedInpaintPipelinez0.33.1image_processorvqvae	tokenizertext_encodertransformer	schedulerz text_encoder->transformer->vqvaec                    t         |           | j                  |||||       t        | dd       r/dt	        | j
                  j                  j                        dz
  z  nd| _        t        | j                  d      | _
        t        | j                  dddd	      | _        | j                  j                  d
       y )N)r   r   r   r   r   r   r         F)vae_scale_factordo_normalizeT)r#   r$   do_binarizedo_convert_grayscale	do_resizelinear)masking_schedule)super__init__register_modulesgetattrlenr   configblock_out_channelsr#   r   r   mask_processorr   register_to_config)selfr   r   r   r   r   	__class__s         m/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/amused/pipeline_amused_inpaint.pyr+   zAmusedInpaintPipeline.__init__T   s     	%# 	 	
 ELDRY[_D`A#djj''::;a?@fg 	  1$BWBWfkl/!22!%
 	))8)D    Npromptimage
mask_imagestrengthnum_inference_stepsguidance_scalenegative_promptnum_images_per_prompt	generatorprompt_embedsencoder_hidden_statesnegative_prompt_embedsnegative_encoder_hidden_statesreturn_dictcallbackcallback_stepscross_attention_kwargs"micro_conditioning_aesthetic_scoremicro_conditioning_crop_coordtemperaturec           	         |
||
|t        d      ||||t        d      ||
||
t        d      t        |t              r|g}|t        |      }n|
j                  d   }||z  }|
|| j                  |ddd| j
                  j                  	      j                  j                  | j                        }| j                  |dd
      }|j                  }
|j                  d   }|
j                  |d      }
|j                  |dd      }|dkD  r||dgt        |      z  }t        |t              r|g}| j                  |ddd| j
                  j                  	      j                  j                  | j                        }| j                  |dd
      }|j                  }|j                  d   }|j                  |d      }|j                  |dd      }t        j                  ||
g      }
t        j                  ||g      }| j                   j#                  |      }|j                  dd \  }}t        j$                  |||d   |d   |g| j                  |j&                        }|j)                  d      }|j+                  |dkD  rd|z  n|d      }| j,                  j/                  ||| j                         t1        t        | j,                  j2                        |z        }t        | j,                  j2                        |z
  }| j4                  j&                  t        j6                  k(  xr  | j4                  j8                  j:                  }|r| j4                  j=                          | j4                  j?                  |j                  | j4                  j&                  | j                              j@                  }|j                  \  }} }!}"| j4                  jC                  |      d   d   jE                  ||!|"      }| jF                  j#                  ||| jH                  z  || jH                  z        }#|#jE                  |#j                  d   |!|"      jK                         j                  |jL                        }#| j,                  j8                  jN                  ||#<   |#jQ                         |jS                         z  }$|j                  |dd      }| jU                  |      5 }%tW        |t        | j,                  j2                              D ]?  }&| j,                  j2                  |&   }'|dkD  rt        jX                  |gdz        }(n|}(| j[                  |(||
||      })|dkD  r|)j]                  d      \  }*}+|*||+|*z
  z  z   })| j,                  j_                  |)|'||	|$      j`                  }|&t        | j,                  j2                        dz
  k(  s|&dz   | j,                  jb                  z  dk(  r>|%je                          |,|&|z  dk(  r$|&tg        | j,                  dd      z  }, ||,|'|       th        s,tk        jl                          B 	 ddd       |dk(  r|}-n| j4                  jo                  |d||| jH                  z  || jH                  z  | j4                  j8                  jp                  f      jr                  ju                  dd      }-| j                   jw                  |-|      }-|r| j4                  jy                          | j{                          |s|-fS t}        |-      S # 1 sw Y   xY w)a  
        The call function to the pipeline for generation.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
            image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
                `Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
                numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
                or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
                list of arrays, the expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image
                latents as `image`, but if passing latents directly it is not encoded again.
            mask_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
                `Image`, numpy array or tensor representing an image batch to mask `image`. White pixels in the mask
                are repainted while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
                single channel (luminance) before use. If it's a numpy array or pytorch tensor, it should contain one
                color channel (L) instead of 3, so the expected shape for pytorch tensor would be `(B, 1, H, W)`, `(B,
                H, W)`, `(1, H, W)`, `(H, W)`. And for numpy array would be for `(B, H, W, 1)`, `(B, H, W)`, `(H, W,
                1)`, or `(H, W)`.
            strength (`float`, *optional*, defaults to 1.0):
                Indicates extent to transform the reference `image`. Must be between 0 and 1. `image` is used as a
                starting point and more noise is added the higher the `strength`. The number of denoising steps depends
                on the amount of noise initially added. When `strength` is 1, added noise is maximum and the denoising
                process runs for the full number of iterations specified in `num_inference_steps`. A value of 1
                essentially ignores `image`.
            num_inference_steps (`int`, *optional*, defaults to 16):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            guidance_scale (`float`, *optional*, defaults to 10.0):
                A higher guidance scale value encourages the model to generate images closely linked to the text
                `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts to guide what to not include in image generation. If not defined, you need to
                pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                The number of images to generate per prompt.
            generator (`torch.Generator`, *optional*):
                A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
                generation deterministic.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
                provided, text embeddings are generated from the `prompt` input argument. A single vector from the
                pooled and projected final hidden states.
            encoder_hidden_states (`torch.Tensor`, *optional*):
                Pre-generated penultimate hidden states from the text encoder providing additional text conditioning.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
                not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
            negative_encoder_hidden_states (`torch.Tensor`, *optional*):
                Analogous to `encoder_hidden_states` for the positive prompt.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generated image. Choose between `PIL.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
                plain tuple.
            callback (`Callable`, *optional*):
                A function that calls every `callback_steps` steps during inference. The function is called with the
                following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
            callback_steps (`int`, *optional*, defaults to 1):
                The frequency at which the `callback` function is called. If not specified, the callback is called at
                every step.
            cross_attention_kwargs (`dict`, *optional*):
                A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
                [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
            micro_conditioning_aesthetic_score (`int`, *optional*, defaults to 6):
                The targeted aesthetic score according to the laion aesthetic classifier. See
                https://laion.ai/blog/laion-aesthetics/ and the micro-conditioning section of
                https://huggingface.co/papers/2307.01952.
            micro_conditioning_crop_coord (`Tuple[int]`, *optional*, defaults to (0, 0)):
                The targeted height, width crop coordinates. See the micro-conditioning section of
                https://huggingface.co/papers/2307.01952.
            temperature (`Union[int, Tuple[int, int], List[int]]`, *optional*, defaults to (2, 0)):
                Configures the temperature scheduler on `self.scheduler` see `AmusedScheduler#set_timesteps`.

        Examples:

        Returns:
            [`~pipelines.pipeline_utils.ImagePipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.pipeline_utils.ImagePipelineOutput`] is returned, otherwise a
                `tuple` is returned where the first element is a list with the generated images.
        NzGpass either both `prompt_embeds` and `encoder_hidden_states` or neitherzXpass either both `negatve_prompt_embeds` and `negative_encoder_hidden_states` or neitherz,pass only one of `prompt` or `prompt_embeds`r   pt
max_lengthT)return_tensorspadding
truncationrM   )rD   output_hidden_statesr!         ? )devicedtyper   )rV   rU   )total)micro_condspooled_text_embrA   rG   )model_outputtimestepsampler?   starting_mask_ratioorderlatent)force_not_quantizeshape)?
ValueError
isinstancestrr.   rb   r   model_max_length	input_idsto_execution_devicer   text_embedshidden_statesrepeattorchconcatr   
preprocesstensorrV   	unsqueezeexpandr   set_timestepsint	timestepsr   float16r/   force_upcastfloatencodelatentsquantizereshaper1   r#   boolrU   mask_token_idsumnumelprogress_barrangecatr   chunkstepprev_sampler_   updater-   XLA_AVAILABLExm	mark_stepdecodelatent_channelsr]   clippostprocesshalfmaybe_free_model_hooksr   ).r3   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   output_typerD   rE   rF   rG   rH   rI   rJ   
batch_sizerg   outputsheightwidthrY   start_timestep_idxneeds_upcastingrz   latents_bszchannelslatents_heightlatents_widthmaskr^   r   ir\   model_inputr[   uncond_logitscond_logitsstep_idxoutputs.                                                 r5   __call__zAmusedInpaintPipeline.__call__r   s   X %*?*G!&;&Gfgg".3Q3Y"*/M/Yj  N}4&:LQ^QjKLLfc"XFVJ&,,Q/J"77
 #$>>:: '  i4112  ''	tZ^'_G#//M$+$9$9"$=!%,,-BAF 5 < <=RTUWX YC%-"*')dS[&8Oos3'6&7O NN##'(##~~>> +  )BBt556  ++I4^b+c)0)<)<&181F1Fr1J.%;%B%BCXZ[%\"-K-R-RShjkmn-o*!LL*@-)PQM$)LL2PRg1h$i!$$//6BC( ll-a0-a02 ))'--

 "++A.!((>C;OZU_acd$$%8+tG]G]^!#dnn&>&>"?("JK !9!9:=PP****emm;^

@Q@Q@^@^JJ**##EHH4::3C3CDLbLbH$cdll?F}}<X~}**%%g.q1!4<<[.Zgh""--$"7"77$BWBW9W
 ||DJJqM>=INNPSST[TbTbc--;;"hhj7==?:..!61=%89 $	#\-s4>>3K3K/LM ##>>33A6!C'"'))WIM":K")K#// +$1*?+A  0   "C'1=1C1CA1F.M;#0>[S`E`3a#aL..--!-%"'(; .  +  DNN44599q1uH\H\>\`a>a '')+N0Ba0G#$(K#K 8W= LLNG##$	#L ("FZZ&&#'d333T222JJ%%55	 ' 	 fTT!QZ  ))55fkJF

!##%9"6**{$	# $	#s   4E\>
\>>])NNNrS      g      $@Nr!   NNNNNpilTNr!   N   )r   r   )r   r   )"__name__
__module____qualname___last_supported_versionr   __annotations__r   r   r
   r   r   model_cpu_offload_seq_exclude_from_cpu_offloadr+   rm   no_gradr   EXAMPLE_DOC_STRINGr   r	   r   re   r   rx   rt   	GeneratorTensorr}   r   r   r   r   r   __classcell__)r4   s   @r5   r   r   D   sZ   &&&N-->
 ")	EE !E 2	E
 !E #E< U]]_12 37$()-#% $;?/0/3048<9=AE GK;?239?>D-V+tCy#~./V+ "V+ '	V+
 V+ !V+ V+ "%T#Y"78V+  (}V+ EOO,V+  -V+  (5V+ !) 6V+ )1(>V+  !V+" 8S#u||$<d$BCD#V+$ %V+& !)c3h 8'V+( -0)V+* (-S#X+V+, 3c3hc:;-V+ 3 V+r6   r   )"typingr   r   r   r   r   r   r	   rm   transformersr
   r   r   r   r   modelsr   r   
schedulersr   utilsr   r   pipeline_utilsr   r   r   torch_xla.core.xla_modelcore	xla_modelr   r   r   r    r6   r5   <module>r      s\     E D D  C D * ) F \ \ ))MM @F+35F F+r6   