
    bi                     
   d dl Z d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlmZmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z,  e%       rd dl-m.c m/Z0 dZ1ndZ1 e&jd                  e3      Z4 e#       rd dl5m6Z6  e$       rd dl7Z7dZ8	 	 	 	 ddee9   deee:ejv                  f      dee
e9      dee
e<      fdZ= G d de+      Z> G d de>      Z?y)    N)CallableDictListOptionalTupleUnion)GemmaPreTrainedModelGemmaTokenizerGemmaTokenizerFast   )MultiPipelineCallbacksPipelineCallback)VaeImageProcessor)AutoencoderKL)get_2d_rotary_pos_embed_lumina)LuminaNextDiT2DModel)FlowMatchEulerDiscreteScheduler)BACKENDS_MAPPING	deprecateis_bs4_availableis_ftfy_availableis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipelineImagePipelineOutputTF)BeautifulSoupaA  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import LuminaPipeline

        >>> pipe = LuminaPipeline.from_pretrained("Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16)
        >>> # Enable memory optimizations.
        >>> pipe.enable_model_cpu_offload()

        >>> prompt = "Upper body of a young woman in a Victorian-era outfit with brass goggles and leather straps. Background shows an industrial revolution cityscape with smoky skies and tall, metal structures"
        >>> image = pipe(prompt).images[0]
        ```
num_inference_stepsdevice	timestepssigmasc                    ||t        d      |dt        t        j                  | j                        j
                  j                               v }|st        d| j                   d       | j                  d
||d| | j                  }t        |      }||fS |dt        t        j                  | j                        j
                  j                               v }|st        d| j                   d       | j                  d
||d| | j                  }t        |      }||fS  | j                  |fd	|i| | j                  }||fS )a  
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
            must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`List[int]`, *optional*):
            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
            `num_inference_steps` and `sigmas` must be `None`.
        sigmas (`List[float]`, *optional*):
            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
            `num_inference_steps` and `timesteps` must be `None`.

    Returns:
        `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr"   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r"   r!   r#   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r#   r!   r!    )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r"   len)	schedulerr    r!   r"   r#   kwargsaccepts_timestepsaccept_sigmass           e/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/lumina/pipeline_lumina.pyretrieve_timestepsr4   M   s   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	 	M)FMfM''	!)n ))) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	 	GvfGG''	!)n ))) 	 	 3MFMfM''	)))    c            3       $    e Zd ZdZ ej
                  d      Zg ZdZddgZ	de
deded	ed
eeef   f
 fdZ	 	 	 	 d7deeee   f   dedeej.                     dee   dee   f
dZ	 	 	 	 	 	 	 	 	 d8deeee   f   dedeeee   f   dedeej.                     deej4                     deej4                     deej4                     deej4                     defdZd Z	 	 	 	 	 d9dZd:dZd Zd;dZ e!d         Z"e!d!        Z#e!d"        Z$ ejJ                          e&e'      dddd#d$dddddddddd%ddd&d'dddgfdeeee   f   d(ee   d)ee   d*ed+e(deeee   f   d,ee(   dee   d-eeejR                  eejR                     f      deej4                     deej4                     deej4                     deej4                     deej4                     d.ee   d/eded0ed1ee(   d2ee   d3eee*eee+gdf   e,e-f      d4ee   d5ee.e/f   f.d6              Z0 xZ1S )<LuminaPipelinea  
    Pipeline for text-to-image generation using Lumina-T2I.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

    Args:
        vae ([`AutoencoderKL`]):
            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
        text_encoder ([`GemmaPreTrainedModel`]):
            Frozen Gemma text-encoder.
        tokenizer (`GemmaTokenizer` or `GemmaTokenizerFast`):
            Gemma tokenizer.
        transformer ([`Transformer2DModel`]):
            A text conditioned `Transformer2DModel` to denoise the encoded image latents.
        scheduler ([`SchedulerMixin`]):
            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    u5   [#®•©™&@·º½¾¿¡§~\)\(\]\[\}\{\|\\/\*]{1,}ztext_encoder->transformer->vaelatentsprompt_embedstransformerr/   vaetext_encoder	tokenizerc                 Z   t         |           | j                  |||||       d| _        t	        | j                        | _        d| _        t        | d      r,| j                   | j                  j                  j                  nd| _        | j                  | j                  z  | _        y )N)r;   r<   r=   r:   r/      )vae_scale_factor   r:      )super__init__register_modulesr@   r   image_processormax_sequence_lengthhasattrr:   configsample_sizedefault_sample_sizedefault_image_size)selfr:   r/   r;   r<   r=   r-   s         r3   rD   zLuminaPipeline.__init__   s     	%# 	 	
 !"0$BWBWX#&  t]+0@0@0L ##// 	 
 #'":":T=R=R"Rr5      Npromptnum_images_per_promptr!   clean_caption
max_lengthc                 j   |xs | j                   }t        |t              r|gn|}t        |      }| j	                  ||      }| j                  |d| j                  ddd      }|j                  j                  |      }| j                  |dd      j                  j                  |      }	|	j                  d   |j                  d   k\  rlt        j                  ||	      sV| j
                  j                  |	d d | j                  d	z
  df         }
t        j                  d
| j                   d|
        |j                  j                  |      }| j!                  ||d      }|j"                  d   }| j                   | j                   j$                  }n%| j&                  | j&                  j$                  }nd }|j                  ||      }|j                  \  }}}|j)                  d	|d	      }|j+                  ||z  |d      }|j)                  |d	      }|j+                  ||z  d      }||fS )N)rQ   r?   Tpt)pad_to_multiple_ofrR   
truncationpaddingreturn_tensorslongest)rW   rX   rN   z]The following part of your input was truncated because Gemma can only handle sequences up to z	 tokens: attention_maskoutput_hidden_statesdtyper!   )_execution_device
isinstancestrr.   _text_preprocessingr=   rG   	input_idstoshapetorchequalbatch_decodeloggerwarningr\   r<   hidden_statesr`   r:   repeatview)rM   rO   rP   r!   rQ   rR   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textprompt_attention_maskr9   r`   _seq_lens                   r3   _get_gemma_prompt_embedsz'LuminaPipeline._get_gemma_prompt_embeds   sM    1411'4&&[
))&)Nnn // % 
 %..11&9..SW.Xbbeeflm  $(<(<R(@@UcetIu>>66q$JbJbefJfikJkGk7lmLNN,,-Y|nF
 !, : : = =f E))+@W[ * 
 &33B7(%%++E)$$**EE%((uV(D%++7A%,,Q0EqI%**:8M+MwXZ[ 5 < <=RTU V 5 : ::H];]_a b333r5   Tdo_classifier_free_guidancenegative_promptnegative_prompt_embedsru   negative_prompt_attention_maskc                    || j                   }t        |t              r|gn|}|t        |      }n|j                  d   }|| j                  ||||
      \  }}|r|||nd}t        |t              r||gz  n|}|:t        |      t        |      ur$t        dt        |       dt        |       d      t        |t              r|g}n/|t        |      k7  r!t        d| dt        |       d	| d| d
	      |j                  d   }| j                  |d|dd      }|j                  j                  |      }|j                  j                  |      }	| j                  ||	d      }| j                  j                  }|j                  d   }|j                  \  }}}|j                  ||      }|j!                  d|d      }|j#                  ||z  |d      }|	j!                  |d      }	|	j#                  ||z  d      }	||||	fS )af  
        Encodes the prompt into text encoder hidden states.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                prompt to be encoded
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt not to guide the image generation. If not defined, one has to pass `negative_prompt_embeds`
                instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`). For
                Lumina-T2I, this should be "".
            do_classifier_free_guidance (`bool`, *optional*, defaults to `True`):
                whether to use classifier free guidance or not
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                number of images that should be generated per prompt
            device: (`torch.device`, *optional*):
                torch device to place the resulting embeddings on
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. For Lumina-T2I, it's should be the embeddings of the "" string.
            clean_caption (`bool`, defaults to `False`):
                If `True`, the function will preprocess and clean the provided caption before encoding.
            max_sequence_length (`int`, defaults to 256): Maximum sequence length to use for the prompt.
        r   )rO   rP   r!   rQ    z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.rN   rR   TrT   )rW   rR   rV   rX   r[   r^   r_   rZ   )ra   rb   rc   r.   rg   rx   type	TypeErrorr&   r=   re   rf   r\   r<   r`   rm   rn   ro   )rM   rO   ry   rz   rP   r!   r9   r{   ru   r|   rQ   r0   rp   prompt_max_lengthnegative_text_inputsnegative_text_input_idsnegative_dtyperv   rw   s                      r3   encode_promptzLuminaPipeline.encode_prompt  s   N >++F'4&&VJ&,,Q/J 373P3P&;+	 4Q 40M0 '+A+I1@1LoRTO AK?\_@`jO+<<fuO!d6l$:O&OUVZ[jVkUl mV~Q(  OS1#2"3s?33 )/)::J3K_J` ax/
| <33  !. 3 3A 6#'>>$,# $2 $  ';&D&D&G&G&O#-A-P-P-S-STZ-[*%)%6%6'=%) &7 &" "..44N%;%I%I"%M"288MAw%;%>%>^\b%>%c"%;%B%B1F[]^%_"%;%@%@NcAcelnp%q"-K-R-RShjk-l*-K-P-P22B.* 35KMkkkr5   c                 V   dt        t        j                  | j                  j                        j
                  j                               v }i }|r||d<   dt        t        j                  | j                  j                        j
                  j                               v }|r||d<   |S )Neta	generator)r'   r(   r)   r/   stepr+   r,   )rM   r   r   accepts_etaextra_step_kwargsaccepts_generators         r3   prepare_extra_step_kwargsz(LuminaPipeline.prepare_extra_step_kwargst  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  r5   c
           
          | j                   dz  z  dk7  s| j                   dz  z  dk7  r"t        d j                   dz   d| d| d      |	Lt         fd|	D              s8t        d j                   d	|	D 
cg c]  }
|
 j                  vs|
 c}
       ||t        d
| d| d      ||t        d      |7t	        |t
              s't	        |t              st        dt        |             ||t        d
| d| d      ||t        d| d| d      ||t        d      ||t        d      |||j                  |j                  k7  r&t        d|j                   d|j                   d      |j                  |j                  k7  r&t        d|j                   d|j                   d      y y y c c}
w )Nr   r   z-`height` and `width` have to be divisible by z	 but are z and r   c              3   :   K   | ]  }|j                   v   y wN)_callback_tensor_inputs).0krM   s     r3   	<genexpr>z.LuminaPipeline.check_inputs.<locals>.<genexpr>  s#      F
23A---F
s   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z and `negative_prompt_embeds`: z'Cannot forward both `negative_prompt`: zEMust provide `prompt_attention_mask` when specifying `prompt_embeds`.zWMust provide `negative_prompt_attention_mask` when specifying `negative_prompt_embeds`.zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` z`prompt_attention_mask` and `negative_prompt_attention_mask` must have the same shape when passed directly, but got: `prompt_attention_mask` z% != `negative_prompt_attention_mask` )	r@   r&   allr   rb   rc   listr   rg   )rM   rO   heightwidthrz   r9   r{   ru   r|   "callback_on_step_end_tensor_inputsr   s   `          r3   check_inputszLuminaPipeline.check_inputs  s    T**Q./14AVAVYZAZ8[_`8`?@U@UXY@Y?ZZcdjckkpqvpwwxy  .9# F
7YF
 C
 DTEaEaDbbn  |^  pHvw  bc  ko  kG  kG  bGpq  pH  oI  J  -";08N}o ^0 0  ^ 5w  FC)@TZ\`IaQRVW]R^Q_`aa"8"D0 9*++]_ 
 &+A+M9/9J K*++]_ 
 $)>)Fdee!-2P2Xvww$)?)K""&<&B&BB --:-@-@,A B.445Q8 
 %**.L.R.RR 55J5P5P4Q R6<<=Q@  S *L$C pHs   ;G	G	c                     rOt               sEt        j                  t        d   d   j	                  d             t        j                  d       drOt               sEt        j                  t        d   d   j	                  d             t        j                  d       dt        |t        t        f      s|g}dt        f fd}|D cg c]
  } ||       c}S c c}w )	Nbs4rZ   zSetting `clean_caption=True`z#Setting `clean_caption` to False...Fftfytextc                     r$j                  |       } j                  |       } | S | j                         j                         } | S r   )_clean_captionlowerstrip)r   rQ   rM   s    r3   processz3LuminaPipeline._text_preprocessing.<locals>.process  sH    **40**40 K zz|))+Kr5   )
r   rk   rl   r   formatr   rb   tupler   rc   )rM   r   rQ   r   ts   ` `  r3   rd   z"LuminaPipeline._text_preprocessing  s    !1!3NN+E226==>\]^NN@A!M!2!4NN+F3B7>>?]^_NN@A!M$.6D	# 	 %))q
)))s   C!c                 	   t        |      }t        j                  |      }|j                         j	                         }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        |d      j                  }t        j                  dd|      }t        j                  d	d|      }t        j                  d
d|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }t        j                  d d|      }t        j                  d!d|      }t        j                  | j                  d|      }t        j                  d"d|      }t        j                  d#      }t        t        j                  ||            d$kD  rt        j                  |d|      }t        j                  |      }t        j                   t        j                   |            }t        j                  d%d|      }t        j                  d&d|      }t        j                  d'd|      }t        j                  d(d|      }t        j                  d)d|      }t        j                  d*d|      }t        j                  d+d|      }t        j                  d,d|      }t        j                  d-d|      }t        j                  d.d|      }t        j                  d/d0|      }t        j                  d1d2|      }t        j                  d3d|      }|j                          t        j                  d4d5|      }t        j                  d6d|      }t        j                  d7d|      }t        j                  d8d|      }|j                         S )9Nz<person>personzk\b((?:https?:(?:\/{1,3}|[a-zA-Z0-9%])|[a-zA-Z0-9.\-]+[.](?:com|co|ru|net|org|edu|gov|it)[\w/-]*\b\/?(?!@)))r~   zh\b((?:www:(?:\/{1,3}|[a-zA-Z0-9%])|[a-zA-Z0-9.\-]+[.](?:com|co|ru|net|org|edu|gov|it)[\w/-]*\b\/?(?!@)))zhtml.parser)featuresz
@[\w\d]+\bz[\u31c0-\u31ef]+z[\u31f0-\u31ff]+z[\u3200-\u32ff]+z[\u3300-\u33ff]+z[\u3400-\u4dbf]+z[\u4dc0-\u4dff]+z[\u4e00-\u9fff]+z|[\u002D\u058A\u05BE\u1400\u1806\u2010-\u2015\u2E17\u2E1A\u2E3A\u2E3B\u2E40\u301C\u3030\u30A0\uFE31\uFE32\uFE58\uFE63\uFF0D]+-u   [`´«»“”¨]"u   [‘’]'z&quot;?z&ampz"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} z\d:\d\d\s+$z\\nz
#\d{1,3}\bz	#\d{5,}\bz
\b\d{6,}\bz0[\S]+\.(?:png|jpg|jpeg|bmp|webp|eps|pdf|apk|mp4)z
[\"\']{2,}z[\.]{2,}z\s+\.\s+z	(?:\-|\_)r   z\b[a-zA-Z]{1,3}\d{3,15}\bz\b[a-zA-Z]+\d+[a-zA-Z]+\bz\b\d+[a-zA-Z]+\d+\bz!(worldwide\s+)?(free\s+)?shippingz(free\s)?download(\sfree)?z\bclick\b\s(?:for|on)\s\w+z9\b(?:png|jpg|jpeg|bmp|webp|eps|pdf|apk|mp4)(\simage[s]?)?z\bpage\s+\d+\bz*\b\d*[a-zA-Z]+\d+[a-zA-Z]+\d+[a-zA-Z\d]*\bu   \b\d+\.?\d*[xх×]\d+\.?\d*\bz
\b\s+\:\s+z: z(\D[,\./])\bz\1 z\s+z^[\"\']([\w\W]+)[\"\']$z\1z^[\'\_,\-\:;]z[\'\_,\-\:\-\+]$z^\.\S+$)rc   ulunquote_plusr   r   resubr   r   bad_punct_regexcompiler.   findallr   fix_texthtmlunescape)rM   captionregex2s      r3   r   zLuminaPipeline._clean_caption  s   g,//'*--/'')&&Xw7&&z

 &&w
  -@EE &&G4 &&,b':&&,b':&&,b':&&,b':&&,b':&&,b':&&,b': && L
 &&-sG<&&c73 &&R1&&"g. &&>WM &&W5 &&g. &&G4&&r73&&G4&&LbRYZ &&g6&&dG4&&--tW=&&dG4 L)rzz&'*+a/ffVS'2G--(--g 67&&5r7C&&5r7C&&/W=&&=r7K&&6GD&&6GD&&UWY[bc&&*B8&&FgV&&92wG&&w7&&&':&&g.&&3UGD&&)38&&,c7;&&R1}}r5   c	                 $   ||t        |      | j                  z  t        |      | j                  z  f}	t        |t              r)t	        |      |k7  rt        dt	        |       d| d      |t        |	|||      }|S |j                  |      }|S )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r   r!   r`   )intr@   rb   r   r.   r&   r   rf   )
rM   rp   num_channels_latentsr   r   r`   r!   r   r8   rg   s
             r3   prepare_latentszLuminaPipeline.prepare_latentsU  s     K4000J$///	
 i&3y>Z+GA#i.AQ R&<'gi 
 ?"5IfTYZG  jj(Gr5   c                     | j                   S r   _guidance_scalerM   s    r3   guidance_scalezLuminaPipeline.guidance_scalei  s    ###r5   c                      | j                   dkD  S )NrN   r   r   s    r3   ry   z*LuminaPipeline.do_classifier_free_guidancep  s    ##a''r5   c                     | j                   S r   )_num_timestepsr   s    r3   num_timestepszLuminaPipeline.num_timestepst  s    """r5      g      @pilrA         ?r   r   r    r   r#   r   output_typereturn_dictrG   scaling_watershedproportional_attncallback_on_step_endr   returnc                    |xs | j                   | j                  z  }|xs | j                   | j                  z  }| j                  |||||||||	       || _        i }|t	        |t
              rd}n-|t	        |t              rt        |      }n|j                  d   }|r| j                  dz  dz  |d<   t        j                  ||z  | j                  dz  z        }| j                  }|dkD  }| j                  |||||||||||	      \  }}}}|r2t        j                  ||gd
      }t        j                  ||gd
      }t!        | j"                  |||      \  }}| j$                  j&                  j(                  }| j+                  ||z  ||||j,                  ||	|
      }
t        |      | _        | j1                  |      5 }t3        |      D ]J  \  }} |rt        j                  |
gdz        n|
}!| }"t        j4                  |"      s|!j6                  j8                  dk(  }#|!j6                  j8                  dk(  }$t	        |"t:              r%|#s|$rt        j<                  nt        j>                  }%n$|#s|$rt        j@                  nt        jB                  }%t        jD                  |"g|%|!j6                        }"n6t        |"j                        dk(  r|"d   jG                  |!j6                        }"|"jI                  |!j                  d         }"d|"| j"                  j&                  jJ                  z  z
  }"|"d   |k  r|}&d}'nd}&|}'tM        | j$                  jN                  dd|&|'      }(| j%                  |!|"|||(|d      d   })|)jQ                  dd
      d   })|r|)ddddf   |)ddddf   }+}*t        jR                  |*t        |*      dz  d
      \  },}-|-||,|-z
  z  z   }.t        j                  |.|.gd
      }*t        j                  |*|+gd
      })|)jQ                  dd
      \  })}/|
j,                  }0|) })| j"                  jU                  |)| |
d      d   }
|
j,                  |0k7  r9t        jV                  jX                  j[                         r|
jG                  |0      }
|j]                          |Hi }1|D ]  }2t_               |2   |1|2<     || || |1      }3|3ja                  d|
      }
|3ja                  d|      }tb        s7te        jf                          M 	 ddd       |dk(  sa|
| jh                  j&                  jj                  z  }
| jh                  jm                  |
d      d   }4| jn                  jq                  |4|      }4n|
}4| js                          |s|4fS tu        |4      S # 1 sw Y   xY w)u  
        Function invoked when calling the pipeline for generation.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
                instead.
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the image generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            num_inference_steps (`int`, *optional*, defaults to 30):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            sigmas (`List[float]`, *optional*):
                Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
                their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
                will be used.
            guidance_scale (`float`, *optional*, defaults to 4.0):
                Guidance scale as defined in [Classifier-Free Diffusion
                Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
                of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
                `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
                the text `prompt`, usually at the expense of lower image quality.
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                The number of images to generate per prompt.
            height (`int`, *optional*, defaults to self.unet.config.sample_size):
                The height in pixels of the generated image.
            width (`int`, *optional*, defaults to self.unet.config.sample_size):
                The width in pixels of the generated image.
            eta (`float`, *optional*, defaults to 0.0):
                Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
                applies to [`schedulers.DDIMScheduler`], will be ignored for others.
            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                to make generation deterministic.
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor will ge generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            prompt_attention_mask (`torch.Tensor`, *optional*): Pre-generated attention mask for text embeddings.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. For Lumina-T2I this negative prompt should be "". If not
                provided, negative_prompt_embeds will be generated from `negative_prompt` input argument.
            negative_prompt_attention_mask (`torch.Tensor`, *optional*):
                Pre-generated attention mask for negative text embeddings.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
            clean_caption (`bool`, *optional*, defaults to `True`):
                Whether or not to clean the caption before creating embeddings. Requires `beautifulsoup4` and `ftfy` to
                be installed. If the dependencies are not installed, the embeddings will be created from the raw
                prompt.
            max_sequence_length (`int` defaults to 120):
                Maximum sequence length to use with the `prompt`.
            callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
                `callback_on_step_end_tensor_inputs`.
            callback_on_step_end_tensor_inputs (`List`, *optional*):
                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
                `._callback_tensor_inputs` attribute of your pipeline class.

        Examples:

        Returns:
            [`~pipelines.ImagePipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
                returned where the first element is a list with the generated images
        )r9   r{   ru   r|   r   NrN   r      r   base_sequence_lengthr   )	rz   rP   r!   r9   r{   ru   r|   rQ   rG   )dim)r#   )totalmpsnpur_   i  )linear_factor
ntk_factorF)rm   timestepencoder_hidden_statesencoder_maskimage_rotary_embcross_attention_kwargsr   r   )r   r8   r9   latent)r   )images);rK   r@   r   r   rb   rc   r   r.   rg   rL   mathsqrtra   r   rh   catr4   r/   r:   rI   in_channelsr   r`   r   progress_bar	enumerate	is_tensorr!   r   floatfloat32float64int32int64tensorrf   expandnum_train_timestepsr   head_dimchunksplitr   backendsr   is_availableupdatelocalspopXLA_AVAILABLExm	mark_stepr;   scaling_factordecoderF   postprocessmaybe_free_model_hooksr   )5rM   rO   r   r   r    r   rz   r#   rP   r   r8   r9   r{   ru   r|   r   r   rQ   rG   r   r   r   r   r   rp   r   r!   ry   r"   latent_channelsr   ir   latent_model_inputcurrent_timestepis_mpsis_npur`   r   r   r   
noise_prednoise_pred_epsnoise_pred_restnoise_pred_cond_epsnoise_pred_uncond_epsnoise_pred_halfrv   latents_dtypecallback_kwargsr   callback_outputsimages5                                                        r3   __call__zLuminaPipeline.__call__x  s$   T K433d6K6KKI11D4I4II 	'#9"7+I/Q 	 
	
  .!# *VS"9JJvt$<VJ&,,Q/J>B>U>UY[>[`a=a"#9:56>D4K4KQ4N#NO''
 '5s&:# '+"7'#9"7+I' 3  
	
!"* '!II}6L&MSTUM$)II/DFd.ekl$m! *<DNNL_agpv)w&	& **11==&&..	
 ")n %89 b	#\!), a#1A\UYYy1}%=bi"#$ '78 066;;uDF/66;;uDF!"2E:28F06&u{{',||)*#188($
 )//0A5'7'='@'@ASAZAZ'[$#3#:#:;M;S;STU;V#W  $%'7$..:O:O:c:c'c#c  $A&)::$2M!$J$'M!/J#A$$--"/)$  "--"4-*7!6%5+A % .  
 (--aQ-7:
 /6@BQB6GTUWXWYTYIZONAF&N(;q(@aB>')> '<n+.CC? 'O &+YY/QWX%YN!&NO+LRS!TJ$.$4$4QA$4$>MJ !(([
..--j!WRW-XYZ[==M1~~))668")**]";##%'3&(O? 9-3Xa[*9';D!Q'X$.229gFG$4$8$8-$XM LLNCa#b	#H h& > >>GHHOOGO?BE((44U4TEE 	##%8O"%00cb	# b	#s   MV4V44V=)rN   NFN)	TNrN   NNNNNF)NNNNN)Fr   )2__name__
__module____qualname____doc__r   r   r   _optional_componentsmodel_cpu_offload_seqr   r   r   r   r	   r   r
   r   rD   rc   r   r   r   rh   r!   boolrx   Tensorr   r   r   rd   r   r   propertyr   ry   r   no_gradr   EXAMPLE_DOC_STRINGr   	Generatorr   r   r   r   r   r   r  __classcell__r-   s   @r3   r7   r7      sS   & !bjj	O  <
S)S 3S 	S
 +S );;<S< &')-(-$(54c49n%54  #54 &	54
  ~54 SM54v -115%&)-049=8<AE#klc49n%kl &*kl sDI~.	kl
  #kl &kl  -kl !) 6kl  (5kl )1(>kl kl\!. #"'++/BJ*2pd( $ $ ( ( # # U]]_12 )-# $#% #15"/0MQ*.049=8<AE%* "#&-0,0 9B3k1c49n%k1 }k1 	k1
 !k1 k1 sDI~.k1 Uk1  (}k1 E%//43H"HIJk1 %,,'k1  -k1 !) 6k1  (5k1 )1(>k1  c]!k1" #k1$ %k1& !'k1( $E?)k1* $D>+k1, '(Cd+T124DF\\]
-k12 -1I3k14 
"E)	*5k1 3 k1r5   r7   c                   >     e Zd Zdededededeee	f   f
 fdZ
 xZS )LuminaText2ImgPipeliner:   r/   r;   r<   r=   c                 L    d}t        dd|       t        | 	  |||||       y )Nz`LuminaText2ImgPipeline` has been renamed to `LuminaPipeline` and will be removed in a future version. Please use `LuminaPipeline` instead.zAdiffusers.pipelines.lumina.pipeline_lumina.LuminaText2ImgPipelinez0.34)r:   r/   r;   r<   r=   )r   rC   rD   )rM   r:   r/   r;   r<   r=   deprecation_messager-   s          r3   rD   zLuminaText2ImgPipeline.__init__  s<     lUW]_rs#% 	 	
r5   )r  r  r  r   r   r   r	   r   r
   r   rD   r  r  s   @r3   r  r    sH    
)
 3
 	

 +
 );;<
 
r5   r  )NNNN)@r   r(   r   r   urllib.parseparser   typingr   r   r   r   r   r   rh   transformersr	   r
   r   	callbacksr   r   rF   r   modelsr   models.embeddingsr   $models.transformers.lumina_nextdit2dr   
schedulersr   utilsr   r   r   r   r   r   r   utils.torch_utilsr   pipeline_utilsr   r   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr  rk   r   r   r   r  r   rc   r!   r   r4   r7   r  r%   r5   r3   <module>r2     s      	  ? ?  Q Q A 0 # ? H 9   . C ))MM			H	% ! & *.15%)$(8*!#8* U3,-.8* S	"	8*
 T%[!8*v]1& ]1@
^ 
r5   