
    biuL                        d dl mZmZ d dlZd dlmZ d dlmc mZ ddl	m
Z
 ddlmZ ddlmZ  G d d	ej                        Z G d
 dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z	 	 	 ddej*                  dej*                  dededeeef   dej*                  fdZ	 	 	 ddej*                  deej*                     dededej*                  f
dZy)    )OptionalTupleN   )	deprecate)is_torch_version   )RMSNormc                        e Zd ZdZ	 	 	 	 ddedededee   def
 fdZde	j                  d	e	j                  fd
Z xZS )
Upsample1Da  A 1D upsampling layer with an optional convolution.

    Parameters:
        channels (`int`):
            number of channels in the inputs and outputs.
        use_conv (`bool`, default `False`):
            option to use a convolution.
        use_conv_transpose (`bool`, default `False`):
            option to use a convolution transpose.
        out_channels (`int`, optional):
            number of output channels. Defaults to `channels`.
        name (`str`, default `conv`):
            name of the upsampling 1D layer.
    channelsuse_convuse_conv_transposeout_channelsnamec                 >   t         |           || _        |xs || _        || _        || _        || _        d | _        |r)t        j                  || j                  ddd      | _        y |r3t        j                  | j                  | j                  dd      | _        y y )N   r   r      )padding)super__init__r   r   r   r   r   convnnConvTranspose1dConv1d)selfr   r   r   r   r   	__class__s         V/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/upsampling.pyr   zUpsample1D.__init__*   s     	 (4H "4		**8T5F5F1aPDI		$--1B1BAqQDI     inputsreturnc                     |j                   d   | j                  k(  sJ | j                  r| j                  |      S t	        j
                  |dd      }| j                  r| j                  |      }|S )Nr          @nearestscale_factormode)shaper   r   r   Finterpolater   )r   r   outputss      r   forwardzUpsample1D.forward?   s`    ||A$--///""99V$$--SyI==ii(Gr   )FFNr   __name__
__module____qualname____doc__intboolr   strr   torchTensorr+   __classcell__r   s   @r   r   r      sp    $ #(&*RR R !	R
 smR R*
ell 
u|| 
r   r   c                        e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 ddedededee   dedee   f fdZdd	e	j                  d
ee   de	j                  fdZ xZS )
Upsample2Da  A 2D upsampling layer with an optional convolution.

    Parameters:
        channels (`int`):
            number of channels in the inputs and outputs.
        use_conv (`bool`, default `False`):
            option to use a convolution.
        use_conv_transpose (`bool`, default `False`):
            option to use a convolution transpose.
        out_channels (`int`, optional):
            number of output channels. Defaults to `channels`.
        name (`str`, default `conv`):
            name of the upsampling 2D layer.
    r   r   r   r   r   kernel_sizec                    t         |           || _        |xs || _        || _        || _        || _        || _        |dk(  rt        j                  ||	|
      | _
        n0|dk(  rt        ||	|
      | _
        n|d | _
        nt        d|       d }|r*|d}t        j                  || j                  |d||      }n4|r2|d}t        j                  | j                  | j                  |||      }|d	k(  r|| _        y || _        y )
Nln_normrms_normzunknown norm_type: r   r   )r:   strider   biasr   )r:   r   r?   r   )r   r   r   r   r   r   r   r)   r   	LayerNormnormr	   
ValueErrorConvTranspose2dConv2dr   Conv2d_0)r   r   r   r   r   r   r:   r   	norm_typeepselementwise_affiner?   r)   r   r   s                 r   r   zUpsample2D.__init__\   s    	 (4H "4	&	!Xs4FGDI*$#/ABDIDI29+>??"%%$++QX_fjD "99T]]D,=,=;`gnrsD 6>DI DMr   hidden_statesoutput_sizer    c                 t   t        |      dkD  s|j                  dd       d}t        dd|       |j                  d   | j                  k(  sJ | j
                  5| j                  |j                  dddd            j                  dddd      }| j                  r| j                  |      S |j                  }|t        j                  k(  r+t        dd	      r|j                  t        j                        }|j                  d   d
k\  r|j                         }| j                   r|dn7t#        t%        ||j                  dd        D cg c]
  \  }}||z   c}}      }	|j'                         |	z  t)        dd      kD  r|j                         }|t+        j                   |dd      }nt+        j                   ||d      }|t        j                  k(  rt        dd	      r|j                  |      }| j,                  r3| j.                  dk(  r| j                  |      }|S | j1                  |      }|S c c}}w )Nr   scalezThe `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.z1.0.0r   r   r   <z2.1@      r"   r#   r$   )sizer&   r   )lengetr   r'   r   rA   permuter   r   dtyper4   bfloat16r   tofloat32
contiguousr)   maxzipnumelpowr(   r   r   rE   )
r   rI   rJ   argskwargsdeprecation_messagerU   fsr%   s
             r   r+   zUpsample2D.forward   s   t9q=FJJw5A #Ugw(;<""1%66699  IIm&;&;Aq!Q&GHPPQRTUWXZ[\M""99]++ ##ENN"'7U'C),,U]];M q!R')446M  !(cSVcViVijljmVnEo2pTQ1q52p.q  ""$|3c!Rj@ - 8 8 :" !m#T] ^ !m+T] ^ ENN"'7U'C),,U3M ==yyF" $		- 8  !%m <+ 3qs   	H4
)FFNr   Nr   NNNTT)Nr,   r7   s   @r   r9   r9   L   s    $ #(&*%)0!0! 0! !	0!
 sm0! 0! c]0!d2U\\ 2 2jojvjv 2r   r9   c                       e Zd ZdZ	 	 	 	 ddee   dee   dedeeeeef   f fdZ	 	 	 	 dde	j                  dee	j                     d	ee	j                     d
edede	j                  fdZde	j                  de	j                  fdZ xZS )FirUpsample2Da  A 2D FIR upsampling layer with an optional convolution.

    Parameters:
        channels (`int`, optional):
            number of channels in the inputs and outputs.
        use_conv (`bool`, default `False`):
            option to use a convolution.
        out_channels (`int`, optional):
            number of output channels. Defaults to `channels`.
        fir_kernel (`tuple`, default `(1, 3, 3, 1)`):
            kernel for the FIR filter.
    r   r   r   
fir_kernelc                     t         |           |r|n|}|rt        j                  ||ddd      | _        || _        || _        || _        y )Nr   r   r:   r>   r   )r   r   r   rD   rE   r   re   r   )r   r   r   r   re   r   s        r   r   zFirUpsample2D.__init__   sL     	'3|IIh!TU_`aDM $(r   rI   weightkernelfactorgainr    c                    t        |t              r|dk\  sJ |dg|z  }t        j                  |t        j                        }|j
                  dk(  rt        j                  ||      }|t        j                  |      z  }|||dz  z  z  }| j                  r|j                  d   }|j                  d   }|j                  d   }|j                  d   |z
  |dz
  z
  }	||f}
|j                  d   dz
  |z  |z   |j                  d   dz
  |z  |z   f}|d   |j                  d   dz
  |
d   z  z
  |z
  |d   |j                  d   dz
  |
d   z  z
  |z
  f}|d   dk\  r|d   dk\  sJ |j                  d   |z  }t        j                  ||d|||f      }t        j                  |ddg      j                  ddddd      }t        j                  |||z  d||f      }t        j                  |||
|d	      }t        |t        j                  ||j                   
      |	dz   dz  |z   dz
  |	dz  dz   f      }|S |j                  d   |z
  }	t        |t        j                  ||j                   
      ||	dz   dz  |z   dz
  |	dz  f      }|S )a  Fused `upsample_2d()` followed by `Conv2d()`.

        Padding is performed only once at the beginning, not between the operations. The fused op is considerably more
        efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of
        arbitrary order.

        Args:
            hidden_states (`torch.Tensor`):
                Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
            weight (`torch.Tensor`, *optional*):
                Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. Grouped convolution can be
                performed by `inChannels = x.shape[0] // numGroups`.
            kernel (`torch.Tensor`, *optional*):
                FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * factor`, which
                corresponds to nearest-neighbor upsampling.
            factor (`int`, *optional*): Integer upsampling factor (default: 2).
            gain (`float`, *optional*): Scaling factor for signal magnitude (default: 1.0).

        Returns:
            output (`torch.Tensor`):
                Tensor of the shape `[N, C, H * factor, W * factor]` or `[N, H * factor, W * factor, C]`, and same
                datatype as `hidden_states`.
        r   rU   r   r   r   r   )dims)r>   output_paddingr   device)paduprs   )
isinstancer1   r4   tensorrX   ndimoutersumr   r'   reshapefliprT   r(   conv_transpose2dupfirdn2d_nativerr   )r   rI   rh   ri   rj   rk   convHconvWinC	pad_valuer>   output_shaperp   
num_groupsinverse_convoutputs                   r   _upsample_2dzFirUpsample2D._upsample_2d   s   @ &#&6Q;66 >S6\F fEMM:;;![[0F%))F##4619-.==LLOELLOE,,q/Ca61eai@If%F $$Q'!+v5=$$Q'!+v5=L
 Q=#6#6q#9A#="JJURQ=#6#6q#9A#="JJURN "!$)nQ.?1.DDD&,,Q/36J ]]6JC+NOFZZaV4<<Q1aKF]]6J,<b%+OPF---L &VL,?,?@!m)F2Q6	Q8JKF  Q&0I%VM,@,@A!m)F2Q6	QG	F r   c                    | j                   r_| j                  || j                  j                  | j                        }|| j                  j
                  j                  dddd      z   }|S | j                  || j                  d      }|S )N)ri   r   rn   r   )ri   rj   )r   r   rE   rh   re   r?   r{   )r   rI   heights      r   r+   zFirUpsample2D.forward?  s    ==&&}dmm6J6JSWSbSb&cFdmm0088B1EEF  &&}T__UV&WFr   )NNF)r   r   r   r   )NNr   r   )r-   r.   r/   r0   r   r1   r2   r   r   r4   r5   floatr   r+   r6   r7   s   @r   rd   rd      s     #'&*0<)3-) sm) 	)
 #sC,-)$ *.)-]||] &] &	]
 ] ] 
]~U\\ ell r   rd   c                   b     e Zd ZdZddef fdZdej                  dej                  fdZ xZ	S )KUpsample2DzA 2D K-upsampling layer.

    Parameters:
        pad_mode (`str`, *optional*, default to `"reflect"`): the padding mode to use.
    pad_modec                     t         |           || _        t        j                  g dg      dz  }|j
                  d   dz  dz
  | _        | j                  d|j                  |z  d       y )N)      ?      ?r   r   r   r   ri   F)
persistent)	r   r   r   r4   rw   r'   rs   register_bufferT)r   r   	kernel_1dr   s      r   r   zKUpsample2D.__init__P  sd     LL">!?@1D	??1%*Q.Xy{{Y'>5Qr   r   r    c                 b   t        j                  || j                  dz   dz  fdz  | j                        }|j                  |j                  d   |j                  d   | j
                  j                  d   | j
                  j                  d   g      }t        j                  |j                  d   |j                        }| j
                  j                  |      d d d f   j                  |j                  d   dd      }||||f<   t        j                  ||d| j                  dz  dz         S )Nr   r   r   r   rq   rn   )r>   r   )r(   rs   r   	new_zerosr'   ri   r4   arangerr   rW   expandr}   )r   r   rh   indicesri   s        r   r+   zKUpsample2D.forwardW  s    vA! 3594==I!!QQ!!!$!!!$	
 ,,v||Av}}E'a077QRP#)w !!&&DHHqLSTDTUUr   )reflect)
r-   r.   r/   r0   r3   r   r4   r5   r+   r6   r7   s   @r   r   r   I  s4    R RVell Vu|| Vr   r   c                        e Zd ZdZ	 	 	 	 ddededededededd	f fd
Zdej                  dej                  fdZ	 xZ
S )CogVideoXUpsample3Da  
    A 3D Upsample layer using in CogVideoX by Tsinghua University & ZhipuAI # Todo: Wait for paper release.

    Args:
        in_channels (`int`):
            Number of channels in the input image.
        out_channels (`int`):
            Number of channels produced by the convolution.
        kernel_size (`int`, defaults to `3`):
            Size of the convolving kernel.
        stride (`int`, defaults to `1`):
            Stride of the convolution.
        padding (`int`, defaults to `1`):
            Padding added to all four sides of the input.
        compress_time (`bool`, defaults to `False`):
            Whether or not to compress the time dimension.
    in_channelsr   r:   r>   r   compress_timer    Nc                 n    t         |           t        j                  |||||      | _        || _        y )Nrg   )r   r   r   rD   r   r   )r   r   r   r:   r>   r   r   r   s          r   r   zCogVideoXUpsample3D.__init__z  s2     	IIk<[Y_ipq	*r   r   c                 2   | j                   r |j                  d   dkD  r|j                  d   dz  dk(  rt|d d d d df   |d d d d dd f   }}t        j                  |d      }t        j                  |d      }|d d d d d d d d d f   }t	        j
                  ||gd      }n|j                  d   dkD  rt        j                  |d      }n|j                  d      }t        j                  |d      }|d d d d d d d d d f   }n|j                  \  }}}}}|j                  ddddd      j                  ||z  |||      }t        j                  |d      } |j                  |||g|j                  dd   j                  ddddd      }|j                  \  }}}}}|j                  ddddd      j                  ||z  |||      }| j                  |      } |j                  ||g|j                  dd   j                  ddddd      }|S )	Nr   r   r   r"   )r%   )dimr   r   )
r   r'   r(   r)   r4   catsqueezerT   r{   r   )	r   r   x_firstx_restbcthws	            r   r+   zCogVideoXUpsample3D.forward  s   ||A"v||A':a'?"(Aq/6!Q(3C--cBvC@!!Qa"23GV#4!<a1$vC@*vC@1dAq 01 #LLMAq!Q^^Aq!Q2::1q5!QJF]]6<F#V^^Aq!?fll12.>?GG1aQRTUVF1aA1aA.66q1uaAF6"18v||AB'78@@Aq!QOr   )r   r   r   F)r-   r.   r/   r0   r1   r2   r   r4   r5   r+   r6   r7   s   @r   r   r   g  sz    , #++ + 	+
 + + + 
+ell u|| r   r   rw   ri   ru   downrs   r    c                    |x}}|x}}|d   x}	}
|d   x}}| j                   \  }}}}| j                  d||d      } | j                   \  }}}}|j                   \  }}| j                  d|d|d|      }t        j                  |ddd|dz
  ddd|dz
  g      }|j                  d||z  ||z  |      }t        j                  |ddt        |	d      t        |d      t        |
d      t        |d      g      }|j                  | j                        }|d d t        |
 d      |j                   d   t        | d      z
  t        |	 d      |j                   d   t        | d      z
  d d f   }|j                  dddd      }|j                  dd||z  |
z   |z   ||z  |	z   |z   g      }t        j                  |ddg      j                  dd||      }t        j                  ||      }|j                  d|||z  |
z   |z   |z
  dz   ||z  |	z   |z   |z
  dz         }|j                  dddd      }|d d d d |d d |d d f   }||z  |
z   |z   |z
  |z  dz   }||z  |	z   |z   |z
  |z  dz   }|j                  d|||      S )Nr   r   rn   r   r   )r'   r{   viewr(   rs   rZ   rW   rr   rT   r4   r|   conv2d)rw   ri   ru   r   rs   up_xup_ydown_xdown_ypad_x0pad_y0pad_x1pad_y1_channelin_hin_wminorkernel_hkernel_woutr   out_hout_ws                           r   r~   r~     s    D4FV!fFV!fFV#\\Awd^^BdA.F!<<AtT5Hh
++b$4E
2C
%%aAtaxAq$(;
<C
((2td{D4K
7C
%%aCNCNCNCPVXYN[
\C
&&
C
	VGQ#))A,fWa88VGQ#))A,fWa88		C ++aAq
!C
++r1dTkF2V;TD[6=QTZ=Z[
\C

6Aq6"''1hAA
((3
C
++
tfv%014tfv%014	C ++aAq
!C
a68V8Q&
'CD[6!F*X5&@1DED[6!F*X5&@1DE88B..r   rI   rj   rk   c                    t        |t              r|dk\  sJ |dg|z  }t        j                  |t        j                        }|j
                  dk(  rt        j                  ||      }|t        j                  |      z  }|||dz  z  z  }|j                  d   |z
  }t        | |j                  | j                        ||dz   dz  |z   dz
  |dz  f      }|S )aX  Upsample2D a batch of 2D images with the given filter.
    Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and upsamples each image with the given
    filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the specified
    `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its shape is
    a: multiple of the upsampling factor.

    Args:
        hidden_states (`torch.Tensor`):
            Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
        kernel (`torch.Tensor`, *optional*):
            FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * factor`, which
            corresponds to nearest-neighbor upsampling.
        factor (`int`, *optional*, default to `2`):
            Integer upsampling factor.
        gain (`float`, *optional*, default to `1.0`):
            Scaling factor for signal magnitude (default: 1.0).

    Returns:
        output (`torch.Tensor`):
            Tensor of the shape `[N, C, H * factor, W * factor]`
    r   rm   r   r   rq   rt   )rv   r1   r4   rw   rX   rx   ry   rz   r'   r~   rW   rr   )rI   ri   rj   rk   r   r   s         r   upsample_2dr     s    6 fc"v{22~v\\&6F{{aVV,
eiiFtvqy)*FQ&(I		--	.!m!F*Q.	Q?	F Mr   )r   r   )r   r   )Nr   r   )typingr   r   r4   torch.nnr   torch.nn.functional
functionalr(   utilsr   utils.import_utilsr   normalizationr	   Moduler   r9   rd   r   r   r5   r1   r~   r   r    r   r   <module>r      s%   #      1 "/ /dt tnCBII CLV")) V<=")) =F !//LL//LL// 	// 	//
 
sCx// \\//h &*	,<<,U\\", , 	,
 \\,r   