
    bi:              0          d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlZd dlZd dlmc mZ d dlmZmZmZ d dlmZ d dlmZ ddlmZ d	d
lmZ  e       rd dlmZ d dlm Z  e G d d             Z!e G d d             Z" G d d      Z#d Z$d Z%	 	 	 	 d7dejL                  de'dejL                  de(de)de	ejL                     de"fdZ* ejV                         	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d8de	e
e,e-e,   f      de	e'   de	e'   d e'd!e(d"e	e
e,e-e,   f      d#e	e'   de(d$e	e
ej\                  e-ej\                     f      d%e	ejL                     d&e	ejL                     d'e	ejL                     d(e	e,   d)e)d*e	ee'e'ejL                  gdf      d+e'd,e	e/e,ef      d-e(f$d.       Z0	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d9de	e
e,e-e,   f      de	e'   de	e'   d e'd!e(d/e)d0e)d1e)d2e'd3e1d"e	e
e,e-e,   f      d#e	e'   de(d$e	e
ej\                  e-ej\                     f      d%e	ejL                     d&e	ejL                     d'e	ejL                     d(e	e,   d)e)d*e	ee'e'ejL                  gdf      d+e'd,e	e/e,ef      d-e(f.d4Z2 G d5 d6e#      Z3y):    N)	dataclass)AnyCallableOptionalUnion)DDIMSchedulerStableDiffusionPipelineUNet2DConditionModel)rescale_noise_cfg)is_peft_available   )randn_tensor   )convert_state_dict_to_diffusers)
LoraConfig)get_peft_model_state_dictc                   l    e Zd ZU dZej
                  ed<   ej
                  ed<   ej
                  ed<   y)DDPOPipelineOutputa_  
    Output class for the diffusers pipeline to be finetuned with the DDPO trainer

    Args:
        images (`torch.Tensor`):
            The generated images.
        latents (`list[torch.Tensor]`):
            The latents used to generate the images.
        log_probs (`list[torch.Tensor]`):
            The log probabilities of the latents.

    imageslatents	log_probsN__name__
__module____qualname____doc__torchTensor__annotations__     V/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/models/modeling_sd_base.pyr   r   &   s'     LL\\||r!   r   c                   N    e Zd ZU dZej
                  ed<   ej
                  ed<   y)DDPOSchedulerOutputad  
    Output class for the diffusers scheduler to be finetuned with the DDPO trainer

    Args:
        latents (`torch.Tensor`):
            Predicted sample at the previous timestep. Shape: `(batch_size, num_channels, height, width)`
        log_probs (`torch.Tensor`):
            Log probability of the above mentioned sample. Shape: `(batch_size)`
    r   r   Nr   r    r!   r"   r$   r$   :   s     \\||r!   r$   c                       e Zd ZdZdefdZdefdZed        Z	ed        Z
ed        Zed        Zed	        Zed
        Zd Zd Zd Zd Zd Zy)DDPOStableDiffusionPipelinezU
    Main class for the diffusers pipeline to be finetuned with the DDPO trainer
    returnc                     t         NNotImplementedErrorselfargskwargss      r"   __call__z$DDPOStableDiffusionPipeline.__call__O       !!r!   c                     t         r)   r*   r,   s      r"   scheduler_stepz*DDPOStableDiffusionPipeline.scheduler_stepR   r1   r!   c                     t         )z@
        Returns the 2d U-Net model used for diffusion.
        r*   r-   s    r"   unetz DDPOStableDiffusionPipeline.unetU   
    
 "!r!   c                     t         )zq
        Returns the Variational Autoencoder model used from mapping images to and from the latent space
        r*   r5   s    r"   vaezDDPOStableDiffusionPipeline.vae\   r7   r!   c                     t         )zG
        Returns the tokenizer used for tokenizing text inputs
        r*   r5   s    r"   	tokenizerz%DDPOStableDiffusionPipeline.tokenizerc   r7   r!   c                     t         )zc
        Returns the scheduler associated with the pipeline used for the diffusion process
        r*   r5   s    r"   	schedulerz%DDPOStableDiffusionPipeline.schedulerj   r7   r!   c                     t         )zH
        Returns the text encoder used for encoding text inputs
        r*   r5   s    r"   text_encoderz(DDPOStableDiffusionPipeline.text_encoderq   r7   r!   c                     t         )z6
        Returns the autocast context manager
        r*   r5   s    r"   autocastz$DDPOStableDiffusionPipeline.autocastx   r7   r!   c                     t         )z?
        Sets the progress bar config for the pipeline
        r*   r,   s      r"   set_progress_bar_configz3DDPOStableDiffusionPipeline.set_progress_bar_config   
     "!r!   c                     t         )z0
        Saves all of the model weights
        r*   r,   s      r"   save_pretrainedz+DDPOStableDiffusionPipeline.save_pretrained   rD   r!   c                     t         )zB
        Returns the trainable parameters of the pipeline
        r*   r,   s      r"   get_trainable_layersz0DDPOStableDiffusionPipeline.get_trainable_layers   rD   r!   c                     t         )zq
        Light wrapper around accelerate's register_save_state_pre_hook which is run before saving state
        r*   r,   s      r"   save_checkpointz+DDPOStableDiffusionPipeline.save_checkpoint   rD   r!   c                     t         )zq
        Light wrapper around accelerate's register_lad_state_pre_hook which is run before loading state
        r*   r,   s      r"   load_checkpointz+DDPOStableDiffusionPipeline.load_checkpoint   rD   r!   N)r   r   r   r   r   r0   r$   r3   propertyr6   r9   r;   r=   r?   rA   rC   rF   rH   rJ   rL   r    r!   r"   r&   r&   J   s    "+= ""1D " " " " " " " " " " " " """"""r!   r&   c                     | j                   }|t        |      kD  rt        d      | j                  | j                  dt        |      |z
  z  z         j                  |      S )a  
    As opposed to the default direction of broadcasting (right to left), this function broadcasts from left to right

        Args:
            input_tensor (`torch.FloatTensor`): is the tensor to broadcast
            shape (`tuple[int]`): is the shape to broadcast to
    zrThe number of dimensions of the tensor to broadcast cannot be greater than the length of the shape to broadcast to)r   )ndimlen
ValueErrorreshapeshapebroadcast_to)input_tensorrS   
input_ndims      r"   _left_broadcastrW      sf     ""JCJ A
 	
  2 2TSZ*=T5U UVccdijjr!   c                    t        j                  | j                  d|j                               j	                  |j
                        }t        j                  |j                         dk\  | j                  j                  d|j                               | j                        j	                  |j
                        }d|z
  }d|z
  }||z  d||z  z
  z  }|S )Nr   r   )r   gatheralphas_cumprodcputodevicewherefinal_alpha_cumprod)r-   timestepprev_timestepalpha_prod_talpha_prod_t_prevbeta_prod_tbeta_prod_t_prevvariances           r"   _get_variancerg      s    << 3 3QGJJ8??[Lq ""1m&7&7&9:   	b	 
 l"K,, ;.1|FW7W3WXHOr!   model_outputr`   sampleetause_clipped_model_outputprev_sampler'   c           	      *   | j                   t        d      || j                  j                  | j                   z  z
  }t	        j
                  |d| j                  j                  dz
        }| j                  j                  d|j                               }	t	        j                  |j                         dk\  | j                  j                  d|j                               | j                        }
t        |	|j                        j                  |j                        }	t        |
|j                        j                  |j                        }
d|	z
  }| j                  j                  dk(  r||dz  |z  z
  |	dz  z  }|}n| j                  j                  dk(  r|}||	dz  |z  z
  |dz  z  }n_| j                  j                  dk(  r#|	dz  |z  |dz  |z  z
  }|	dz  |z  |dz  |z  z   }n#t        d| j                  j                   d	      | j                  j                   r| j#                  |      }nQ| j                  j$                  r;|j                  | j                  j&                   | j                  j&                        }t)        | ||      }||dz  z  }t        ||j                        j                  |j                        }|r||	dz  |z  z
  |dz  z  }d|
z
  |d
z  z
  dz  |z  }|
dz  |z  |z   }||t        d      |5t+        |j                  ||j                  |j,                        }|||z  z   }|j/                         |z
  d
z   d
|d
z  z  z  t	        j0                  |      z
  t	        j0                  t	        j2                  d
t	        j4                  t6        j8                        z              z
  }|j;                  t=        t?        d|j@                                    }tC        |jE                  |j,                        |      S )a  

    Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion process
    from the learned model outputs (most often the predicted noise).

    Args:
        model_output (`torch.FloatTensor`): direct output from learned diffusion model.
        timestep (`int`): current discrete timestep in the diffusion chain.
        sample (`torch.FloatTensor`):
            current instance of sample being created by diffusion process.
        eta (`float`): weight of noise for added noise in diffusion step.
        use_clipped_model_output (`bool`): if `True`, compute "corrected" `model_output` from the clipped
            predicted original sample. Necessary because predicted original sample is clipped to [-1, 1] when
            `self.config.clip_sample` is `True`. If no clipping has happened, "corrected" `model_output` would coincide
            with the one provided as input and `use_clipped_model_output` will have not effect.
        generator: random number generator.
        variance_noise (`torch.FloatTensor`): instead of generating noise for the variance using `generator`, we
            can directly provide the noise for the variance itself. This is useful for methods such as CycleDiffusion.
            (https://huggingface.co/papers/2210.05559)

    Returns:
        `DDPOSchedulerOutput`: the predicted sample at the previous timestep and the log probability of the sample
    zaNumber of inference steps is 'None', you need to run 'set_timesteps' after creating the schedulerr   r   epsilong      ?ri   v_predictionzprediction_type given as z6 must be one of `epsilon`, `sample`, or `v_prediction`r   zsCannot pass both generator and prev_sample. Please make sure that either `generator` or `prev_sample` stays `None`.)	generatorr]   dtype)dim)#num_inference_stepsrQ   confignum_train_timestepsr   clamprZ   rY   r[   r^   r_   rW   rS   r\   r]   prediction_typethresholding_threshold_sampleclip_sampleclip_sample_rangerg   r   rq   detachlogsqrt	as_tensornppimeantuplerangerO   r$   type)r-   rh   r`   ri   rj   rk   rp   rl   ra   rb   rc   rd   pred_original_samplepred_epsilonrf   	std_dev_tpred_sample_directionprev_sample_meanvariance_noiselog_probs                       r"   r3   r3      s   D 'o
 	
 t{{>>$BZBZZZMKKq$++2Q2QTU2UVM &&--a@Lq ""1m&7&7&9:  
 #<>AA&--PL'(96<<HKKFMMZl"K {{""i/ &)=)L LP\adPee#		$	$	0+#!69M!MMQ\adQee		$	$	6 ,c 1V;{C?OS_>__$c)\9[#=MQW<WW'(C(C'D E 
 	
 {{#556JK		 	 399[[***DKK,I,I 
 T8];Hh3''I	6<<8;;FMMJI#!69M!MMQ\adQee !22Y\AsKlZ )S14HHK``9#8+
 	

 %&&$$	
 '^)CC "22q
89Q)Q,=OP
))I
	
))EJJq5??255#99:
;	<  }}uQ'>!?}@H{//=xHHr!   promptheightwidthrs   guidance_scalenegative_promptnum_images_per_promptrp   r   prompt_embedsnegative_prompt_embedsoutput_typereturn_dictcallbackcallback_stepscross_attention_kwargsguidance_rescalec           
         |xs- | j                   j                  j                  | j                  z  }|xs- | j                   j                  j                  | j                  z  }| j	                  |||||||       |t        |t              rd}n-|t        |t              rt        |      }n|j                  d   }| j                  }|dkD  }||j                  dd      nd}| j                  ||||||||      }| j                  j                  ||       | j                  j                  }| j                   j                  j                   }| j#                  ||z  ||||j$                  ||	|
      }
t        |      || j                  j&                  z  z
  }|
g}g }| j)                  |      5 }t+        |      D ]D  \  }}|rt-        j.                  |
gd	z        n|
}| j                  j1                  ||      }| j                  ||||d
      d   } |r| j3                  d	      \  }!}"|!||"|!z
  z  z   } |r|dkD  rt5        | "|      } t7        | j                  | ||
|      }#|#j8                  }
|#j:                  }$|j=                  |
       |j=                  |$       |t        |      dz
  k(  s*|dz   |kD  s|dz   | j                  j&                  z  dk(  s|j?                          |1||z  dk(  s; ||||
       G 	 ddd       |dk(  sb| j@                  jC                  |
| j@                  j                  jD                  z  d
      d   }%| jG                  |%||j$                        \  }%}&n|
}%d}&|&dg|%j                  d   z  }'n|&D (cg c]  }(|(  }'}(| jH                  jK                  |%||'      }%tM        | d      r&| jN                  | jN                  jQ                          tS        |%||      S # 1 sw Y   xY wc c}(w )u  
    Function invoked when calling the pipeline for generation. Args: prompt (`str` or `list[str]`, *optional*): The
    prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. instead. height
    (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): The height in pixels of the
    generated image.
        width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
            The width in pixels of the generated image.
        num_inference_steps (`int`, *optional*, defaults to 50):
            The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense
            of slower inference.
        guidance_scale (`float`, *optional*, defaults to 7.5):
            Guidance scale as defined in [Classifier-Free Diffusion
            Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. of
            [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
            `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to the
            text `prompt`, usually at the expense of lower image quality.
        negative_prompt (`str` or `list[str]`, *optional*):
            The prompt or prompts not to guide the image generation. If not defined, one has to pass
            `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
            less than `1`).
        num_images_per_prompt (`int`, *optional*, defaults to 1):
            The number of images to generate per prompt.
        eta (`float`, *optional*, defaults to 0.0):
            Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only applies
            to [`schedulers.DDIMScheduler`], will be ignored for others.
        generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
            One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) to
            make generation deterministic.
        latents (`torch.FloatTensor`, *optional*):
            Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
            generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
            tensor will ge generated by sampling using the supplied random `generator`.
        prompt_embeds (`torch.FloatTensor`, *optional*):
            Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
            provided, text embeddings will be generated from `prompt` input argument.
        negative_prompt_embeds (`torch.FloatTensor`, *optional*):
            Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
            If not provided, negative_prompt_embeds will be generated from `negative_prompt` input argument.
        output_type (`str`, *optional*, defaults to `"pil"`):
            The output format of the generate image. Choose between [PIL](https://pillow.readthedocs.io/en/stable/):
            `PIL.Image.Image` or `np.array`.
        return_dict (`bool`, *optional*, defaults to `True`):
            Whether to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a plain tuple.
        callback (`Callable`, *optional*):
            A function that will be called every `callback_steps` steps during inference. The function will be called
            with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
        callback_steps (`int`, *optional*, defaults to 1):
            The frequency at which the `callback` function will be called. If not specified, the callback will be
            called at every step.
        cross_attention_kwargs (`dict`, *optional*):
            A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
            `self.processor` in
            [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
        guidance_rescale (`float`, *optional*, defaults to 0.7):
            Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
            Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
            [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891).
            Guidance rescale factor should fix overexposure when using zero terminal SNR.

    Examples:

    Returns:
        `DDPOPipelineOutput`: The generated image, the predicted latents used to generate the image and the associated
        log probabilities
    Nr   r         ?scaler   r   
lora_scaler]   totalr   Fencoder_hidden_statesr   r           r   latentr   Tr   do_denormalizefinal_offload_hook)*r6   rt   sample_sizevae_scale_factorcheck_inputs
isinstancestrlistrP   rS   _execution_deviceget_encode_promptr=   set_timesteps	timestepsin_channelsprepare_latentsrq   orderprogress_bar	enumerater   catscale_model_inputchunkr   r3   r   r   appendupdater9   decodescaling_factorrun_safety_checkerimage_processorpostprocesshasattrr   offloadr   ))r-   r   r   r   rs   r   r   r   rj   rp   r   r   r   r   r   r   r   r   r   
batch_sizer]   do_classifier_free_guidancetext_encoder_lora_scaler   num_channels_latentsnum_warmup_stepsall_latentsall_log_probsr   itlatent_model_input
noise_prednoise_pred_uncondnoise_pred_textscheduler_outputr   imagehas_nsfw_conceptr   has_nsfws)                                            r"   pipeline_stepr   E  sL   p Ktyy''33d6K6KKFITYY%%11D4I4IIE 	 j5
		
64 8[
"((+
##F #13"6 LbKm488$Gsw''##5* ( 	M 	NN  !4V D((I  99++77""**	G 9~(;dnn>R>R(RR)KM			!4		5 $,i( #	,DAq=XG9q=!9^e!%!A!ABTVW!X "&3'=! #  J +5?5E5Ea5H2!?.?UfCf1gg
*/?#/E.z?]mn
  .dnnj!WVYZ&..G'11Hw'  * C	NQ&&AE5E+E1q5TXTbTbThThJhlmJm##%'A,>!,CQ7+G#	,$,L ("$((//*H*H HV[\]^_"&"9"9%I\I\"]%++a.07GH8h,HH  ,,U\j,kE t)*t/F/F/R'')e[-@@q$, $,` Is+   0DO;8O;O;-O;7O;
P;Ptruncated_backproptruncated_backprop_randgradient_checkpointtruncated_backprop_timesteptruncated_rand_backprop_minmaxc                 \	   |xs- | j                   j                  j                  | j                  z  }|xs- | j                   j                  j                  | j                  z  }t	        j
                         5  | j                  |||||||       |t        |t              rd}n-|t        |t              rt        |      }n|j                  d   }| j                  }|dkD  }||j                  dd      nd}| j                  ||||||||      }| j                  j!                  ||       | j                  j"                  }| j                   j                  j$                  }| j'                  ||z  ||||j(                  |||      }ddd       t              || j                  j*                  z  z
  }|g}g } | j-                  |      5 }!t/        |      D ]  \  }"}#rt	        j0                  |gd	z        n|}$| j                  j3                  |$|#      }$|r)t5        j4                  | j                   |$|#||d
      d   }%n| j                  |$|#||d
      d   }%|rI|r2t7        j8                  |
d   |
d         }&|"|&k  r&|%j;                         }%n|"|	k  r|%j;                         }%|r|%j=                  d	      \  }'}(|'||(|'z
  z  z   }%|r|dkD  rt?        |%(|      }%tA        | j                  |%|#||      })|)jB                  }|)jD                  }*|jG                  |       | jG                  |*       |"t        |      dz
  k(  s+|"dz   |kD  ss|"dz   | j                  j*                  z  dk(  s|!jI                          ||"|z  dk(  s ||"|#|        	 ddd       |dk(  sb| jJ                  jM                  || jJ                  j                  jN                  z  d
      d   }+| jQ                  |+|j(                        \  }+},n|}+d},|,dg|+j                  d   z  }-n|,D .cg c]  }.|.  }-}.| jR                  jU                  |+||-      }+tW        | d      r&| jX                  | jX                  j[                          t]        |+||       S # 1 sw Y   xY w# 1 sw Y   xY wc c}.w )u  
    Function to get RGB image with gradients attached to the model weights.

    Args:
        prompt (`str` or `list[str]`, *optional*, defaults to `None`):
            The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`
            instead.
        height (`int`, *optional*, defaults to `pipeline.unet.config.sample_size * pipeline.vae_scale_factor`):
            The height in pixels of the generated image.
        width (`int`, *optional*, defaults to `pipeline.unet.config.sample_size * pipeline.vae_scale_factor`):
            The width in pixels of the generated image.
        num_inference_steps (`int`, *optional*, defaults to `50`):
            The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense
            of slower inference.
        guidance_scale (`float`, *optional*, defaults to `7.5`):
            Guidance scale as defined in [Classifier-Free Diffusion
            Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. of
            [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
            `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to the
            text `prompt`, usually at the expense of lower image quality.
        truncated_backprop (`bool`, *optional*, defaults to True):
            Truncated Backpropation to fixed timesteps, helps prevent collapse during diffusion reward training as
            shown in AlignProp (https://huggingface.co/papers/2310.03739).
        truncated_backprop_rand (`bool`, *optional*, defaults to True):
            Truncated Randomized Backpropation randomizes truncation to different diffusion timesteps, this helps
            prevent collapse during diffusion reward training as shown in AlignProp
            (https://huggingface.co/papers/2310.03739). Enabling truncated_backprop_rand allows adapting earlier
            timesteps in diffusion while not resulting in a collapse.
        gradient_checkpoint (`bool`, *optional*, defaults to True):
            Adds gradient checkpointing to Unet forward pass. Reduces GPU memory consumption while slightly increasing
            the training time.
        truncated_backprop_timestep (`int`, *optional*, defaults to 49):
            Absolute timestep to which the gradients are being backpropagated. Higher number reduces the memory usage
            and reduces the chances of collapse. While a lower value, allows more semantic changes in the diffusion
            generations, as the earlier diffusion timesteps are getting updated. However it also increases the chances
            of collapse.
        truncated_rand_backprop_minmax (`Tuple`, *optional*, defaults to (0,50)):
            Range for randomized backprop. Here the value at 0 index indicates the earlier diffusion timestep to update
            (closer to noise), while the value at index 1 indicates the later diffusion timestep to update.
        negative_prompt (`str` or `list[str]`, *optional*):
            The prompt or prompts not to guide the image generation. If not defined, one has to pass
            `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
            less than `1`).
        num_images_per_prompt (`int`, *optional*, defaults to 1):
            The number of images to generate per prompt.
        eta (`float`, *optional*, defaults to 0.0):
            Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only applies
            to [`schedulers.DDIMScheduler`], will be ignored for others.
        generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
            One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) to
            make generation deterministic.
        latents (`torch.FloatTensor`, *optional*):
            Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
            generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
            tensor will ge generated by sampling using the supplied random `generator`.
        prompt_embeds (`torch.FloatTensor`, *optional*):
            Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
            provided, text embeddings will be generated from `prompt` input argument.
        negative_prompt_embeds (`torch.FloatTensor`, *optional*):
            Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
            If not provided, negative_prompt_embeds will be generated from `negative_prompt` input argument.
        output_type (`str`, *optional*, defaults to `"pil"`):
            The output format of the generate image. Choose between [PIL](https://pillow.readthedocs.io/en/stable/):
            `PIL.Image.Image` or `np.array`.
        return_dict (`bool`, *optional*, defaults to `True`):
            Whether to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a plain tuple.
        callback (`Callable`, *optional*):
            A function that will be called every `callback_steps` steps during inference. The function will be called
            with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
        callback_steps (`int`, *optional*, defaults to 1):
            The frequency at which the `callback` function will be called. If not specified, the callback will be
            called at every step.
        cross_attention_kwargs (`dict`, *optional*):
            A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
            `pipeline.processor` in
            [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
        guidance_rescale (`float`, *optional*, defaults to 0.7):
            Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
            Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
            [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891).
            Guidance rescale factor should fix overexposure when using zero terminal SNR.

    Examples:

    Returns:
        `DDPOPipelineOutput`: The generated image, the predicted latents used to generate the image and the associated
        log probabilities
    Nr   r   r   r   r   r   r   r   F)r   use_reentrantr   r   r   r   r   Tr   r   )/r6   rt   r   r   r   no_gradr   r   r   r   rP   rS   r   r   r   r=   r   r   r   r   rq   r   r   r   r   r   
checkpointrandomrandintr|   r   r   r3   r   r   r   r   r9   r   r   r   r   r   r   r   r   r   )/pipeliner   r   r   rs   r   r   r   r   r   r   r   r   rj   rp   r   r   r   r   r   r   r   r   r   r   r]   r   r   r   r   r   r   r   r   r   r   r   r   rand_timestepr   r   r   r   r   r   r   r   s/                                                  r"   pipeline_step_with_gradr     s   f Sx}}++77(:S:SSFQX]]))558Q8QQE	 8
"	
 *VS"9JJvt$<VJ&,,Q/J++ '5s&:# :P9["&&w5ae 	  !//!''#9. 0 	
 	(()<V(L&&00	  (}}33??**.. 	
_8
t 9~(;h>P>P>V>V(VV)KM			%8		9 >,\i( =	,DAq=XG9q=!9^e!)!3!3!E!EFXZ[!\ #'22MM&!+A"' 
 &]]&*7+A % +  
 " +$*NN6q9;YZ[;\%M =(%/%6%6%8
 66%/%6%6%8
 +5?5E5Ea5H2!?.?UfCf1gg
*/?#/E.z?]mn
  .h.@.@*aQXZ]^&..G'11Hw'  * C	NQ&&AE5E+E1q5T\TfTfTlTlJlpqJq##%'A,>!,CQ7+{=	,>,@ ("##Ghll.A.A.P.P$P^c#defg"*"="=eV]M`M`"a%++a.07GH8h,HH$$00K`n0oE x-.83N3N3Z##++-e[-@@_8
 8
z>, >,T Is8   7DRE=RR-RRR&
R)RR&c                       e Zd ZddddededefdZdefd	Zdefd
Zde	fdZ
ed        Zed        Zed        Zed        Zed        Zed        Zd Zd Zd Zd Zd Zy)"DefaultDDPOStableDiffusionPipelinemainT)pretrained_model_revisionuse_lorapretrained_model_namer   r   c                   t        j                  ||      | _        || _        || _        || _        	 | j                  j                  |d|       d| _        t        j                  | j                  j                  j                        | j                  _        d | j                  _        | j                  j                   j#                  d       | j                  j$                  j#                  d       | j                  j&                  j#                  | j                          y # t        $ r |rt        j                  dt               Y w xY w)N)revision pytorch_lora_weights.safetensors)weight_namer   TzTrying to load LoRA weights but no LoRA weights found. Set `use_lora=False` or check that `pytorch_lora_weights.safetensors` exists in the model folder.F)r	   from_pretrainedsd_pipeliner   pretrained_modelpretrained_revisionload_lora_weightsOSErrorwarningswarnUserWarningr   from_configr=   rt   safety_checkerr9   requires_grad_r?   r6   )r-   r   r   r   s       r"   __init__z+DefaultDDPOStableDiffusionPipeline.__init__  s    2BB!,E
 ! 5#< 	..%>2 / 
 !DM &3%>%>t?O?O?Y?Y?`?`%a"*.' 	++E2%%44U;,,->?  	U	s   %D& &%EEr'   c                 4    t        | j                  g|i |S r)   )r   r   r,   s      r"   r0   z+DefaultDDPOStableDiffusionPipeline.__call__?  s    T--????r!   c                 4    t        | j                  g|i |S r)   )r   r   r,   s      r"   rgb_with_gradz0DefaultDDPOStableDiffusionPipeline.rgb_with_gradB  s    &t'7'7I$I&IIr!   c                 H    t        | j                  j                  g|i |S r)   )r3   r   r=   r,   s      r"   r3   z1DefaultDDPOStableDiffusionPipeline.scheduler_stepE  s#    d..88J4J6JJr!   c                 .    | j                   j                  S r)   )r   r6   r5   s    r"   r6   z'DefaultDDPOStableDiffusionPipeline.unetH  s    $$$r!   c                 .    | j                   j                  S r)   )r   r9   r5   s    r"   r9   z&DefaultDDPOStableDiffusionPipeline.vaeL  s    ###r!   c                 .    | j                   j                  S r)   )r   r;   r5   s    r"   r;   z,DefaultDDPOStableDiffusionPipeline.tokenizerP      )))r!   c                 .    | j                   j                  S r)   )r   r=   r5   s    r"   r=   z,DefaultDDPOStableDiffusionPipeline.schedulerT  r  r!   c                 .    | j                   j                  S r)   )r   r?   r5   s    r"   r?   z/DefaultDDPOStableDiffusionPipeline.text_encoderX  s    ,,,r!   c                 >    | j                   rt        j                  S d S r)   )r   
contextlibnullcontextr5   s    r"   rA   z+DefaultDDPOStableDiffusionPipeline.autocast\  s    )-z%%@D@r!   c                     | j                   rEt        t        | j                  j                              }| j                  j                  ||       | j                  j                  |       y )Nsave_directoryunet_lora_layers)r   r   r   r   r6   save_lora_weightsrF   )r-   
output_dir
state_dicts      r"   rF   z2DefaultDDPOStableDiffusionPipeline.save_pretrained`  sT    ==89RSWScScShSh9ijJ..j[e.f((4r!   c                 <     | j                   j                  |i | y r)   )r   rC   r,   s      r"   rC   z:DefaultDDPOStableDiffusionPipeline.set_progress_bar_configf  s    000$A&Ar!   c                    | j                   rt        dddg d      }| j                  j                  j	                  |       | j                  j                  j                         D ]3  }|j                  s|j                  t        j                        |_
        5 | j                  j                  S | j                  j                  S )N   gaussian)to_kto_qto_vzto_out.0)r
lora_alphainit_lora_weightstarget_modules)r   r   r   r6   add_adapter
parametersrequires_gradr\   r   float32data)r-   lora_configparams      r"   rH   z7DefaultDDPOStableDiffusionPipeline.get_trainable_layersi  s    ==$",C	K !!--k: ))..99; 9&&!&%--!8EJ9 ##(((##(((r!   c                    t        |      dk7  rt        d      | j                  rTt        |d   d      rEt	        |d   dd       5t        t        |d               }| j                  j                  ||       y | j                  sFt        |d   t              r3|d   j                  t        j                  j                  |d             y t        dt        |d                )Nr   CGiven how the trainable params were set, this should be of length 1r   peft_configr  r6   Unknown model type )rP   rQ   r   r   getattrr   r   r   r  r   r
   rF   ospathjoinr   )r-   modelsweightsr  r  s        r"   rJ   z2DefaultDDPOStableDiffusionPipeline.save_checkpoint|  s    v;!bcc==WVAY>76RS9VceiCjCv89RSYZ[S\9]^J..j[e.f:fQi9M#N1I%%bggll:v&FG24q	?2CDEEr!   c                    t        |      dk7  rt        d      | j                  rB| j                  j	                  |d      \  }}| j                  j                  |||d          y | j                  smt        |d   t              rZt        j                  |d      } |d   j                  d
i |j                   |d   j                  |j                                ~y t        d	t        |d                )Nr   r&  r   )r   r   )network_alphasr6   r6   )	subfolderr(  r    )rP   rQ   r   r   lora_state_dictload_lora_into_unetr   r
   r   register_to_configrt   load_state_dictr  r   )r-   r-  	input_dirr2  r0  
load_models         r"   rL   z2DefaultDDPOStableDiffusionPipeline.load_checkpoint  s    v;!bcc==.2.>.>.N.N'I /O /+O^ 00Q_flmnfo0p:fQi9M#N-==iSYZJ(F1I((=:+<+<=1I%%j&;&;&=>24q	?2CDEEr!   N)r   r   r   r   boolr   r   r0   r  r$   r3   rM   r6   r9   r;   r=   r?   rA   rF   rC   rH   rJ   rL   r    r!   r"   r   r     s    W]pt @c @QT @im @@@+= @J0B JK1D K % % $ $ * * * * - - A A5B)&	FFr!   r   )r   FNN)NNN2         @Nr   r   NNNNpilTNr   Nr   )NNNr9  r:  TTT1   )r   r9  Nr   r   NNNNr;  TNr   Nr   )4r
  r*  r   r   dataclassesr   typingr   r   r   r   numpyr   r   torch.utils.checkpointutilsr   	diffusersr   r	   r
   >diffusers.pipelines.stable_diffusion.pipeline_stable_diffusionr   transformers.utilsr   corer   sd_utilsr   peftr   
peft.utilsr   r   r$   r&   rW   rg   FloatTensorintfloatr8  r3   r   r   r   	Generatordictr   r   r   r   r    r!   r"   <module>rN     s    	   ! 1 1   + + R R \ 0  5 4   &   Q" Q"hk ( %*/3CI##CI CI 	CI
 
CI #CI %++,CI CIP  /3 !7;+,IM+/15:>!&HL7;!'MAU3S	>*+MA SMMA C=	MA
 MA MA eCcN34MA $C=MA 
MA eootEOO/DDEFMA e''(MA E--.MA %U%6%67MA #MA MA  xc5+<+< =t CDE!MA" #MA$ %T#s(^4%MA& 'MA MAd /3 !#$( $'),37;+,IM+/15:>!&HL7;!1EAU3S	>*+EA SMEA C=	EA
 EA EA EA "EA EA "%EA %*EA eCcN34EA $C=EA 
EA eootEOO/DDEFEA  e''(!EA" E--.#EA$ %U%6%67%EA& #'EA( )EA* xc5+<+< =t CDE+EA, -EA. %T#s(^4/EA0 1EAPxF)D xFr!   