
    bi(`                        d dl Z d dlmZmZmZ d dlZd dlmZ d dlmc m	Z
 ddlmZmZ ddlmZ ddlmZmZ  G d d	ej&                        Z G d
 dej*                        Z G d dej&                        Z G d dej&                        Z G d dej&                        Z G d dej&                        Z G d dej&                        Z G d dej&                        Z G d dej&                        Z G d dej&                        Z G d dej&                        Z G d dej&                        Z  ed d!      rej*                  Zn G d" d#ej&                        Z G d$ d%ej&                        Z! G d& d'ej&                        Z" G d( d)ej&                        Z# G d* d+ej&                        Z$	 	 	 	 	 d3d,e%d-ee&   d.e'd/e(d0e(d1ej&                  fd2Z)y)4    N)DictOptionalTuple   )is_torch_npu_availableis_torch_version   )get_activation)CombinedTimestepLabelEmbeddings)PixArtAlphaCombinedTimestepSizeEmbeddingsc                        e Zd ZdZ	 	 	 	 	 ddedee   dee   dededef fdZ	 dd	e	j                  d
ee	j                     dee	j                     de	j                  fdZ xZS )AdaLayerNorma  
    Norm layer modified to incorporate timestep embeddings.

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`, *optional*): The size of the embeddings dictionary.
        output_dim (`int`, *optional*):
        norm_elementwise_affine (`bool`, defaults to `False):
        norm_eps (`bool`, defaults to `False`):
        chunk_dim (`int`, defaults to `0`):
    embedding_dimnum_embeddings
output_dimnorm_elementwise_affinenorm_eps	chunk_dimc                 2   t         |           || _        |xs |dz  }|t        j                  ||      | _        nd | _        t        j                         | _        t        j                  ||      | _	        t        j                  |dz  ||      | _        y Nr   )super__init__r   nn	EmbeddingembSiLUsiluLinearlinear	LayerNormnorm)selfr   r   r   r   r   r   	__class__s          Y/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/normalization.pyr   zAdaLayerNorm.__init__)   s}     	"4=1#4
%||NMBDHDHGGI	iiz:LLq(<ST	    xtimesteptembreturnc                 \   | j                   | j                  |      }| j                  | j                  |            }| j                  dk(  r/|j	                  dd      \  }}|d d d d d f   }|d d d d d f   }n|j	                  dd      \  }}| j                  |      d|z   z  |z   }|S )Nr	   r   dimr   )r   r   r   r   chunkr!   )r"   r&   r'   r(   shiftscales         r$   forwardzAdaLayerNorm.forward@   s     8888H%D{{499T?+>>Q  ::aQ:/LE5!T1*%E!T1*%E::aQ:/LE5IIaLAI&.r%   )NNFh㈵>r   )NN)__name__
__module____qualname____doc__intr   boolfloatr   torchTensorr0   __classcell__r#   s   @r$   r   r      s    
 )-$((-UU !U SM	U
 "&U U U0 hl)1%,,)?NVW\WcWcNd	r%   r   c                   D    e Zd Zdej                  dej                  fdZy)FP32LayerNorminputsr)   c                 F   |j                   }t        j                  |j                         | j                  | j
                  | j
                  j                         nd | j                  | j                  j                         nd | j                        j                  |      S N)	dtypeF
layer_normr8   normalized_shapeweightbiasepsto)r"   r?   origin_dtypes      r$   r0   zFP32LayerNorm.forwardV   su    ||||LLN!!#';;#:DKK!%!6DIIOODHH
 "\
	r%   N)r2   r3   r4   r9   r:   r0    r%   r$   r>   r>   U   s    ell u|| r%   r>   c            	            e Zd ZdZddedededdf fdZ	 ddej                  d	e
ej                     deej                  d
f   fdZ xZS )SD35AdaLayerNormZeroXz
    Norm layer adaptive layer norm zero (AdaLN-Zero).

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`): The size of the embeddings dictionary.
    r   	norm_typerG   r)   Nc                     t         |           t        j                         | _        t        j
                  |d|z  |      | _        |dk(  rt        j                  |dd      | _        y t        d| d      )	N	   rG   rD   Fư>elementwise_affinerH   Unsupported `norm_type` (z-) provided. Supported ones are: 'layer_norm'.
r   r   r   r   r   r   r   r    r!   
ValueErrorr"   r   rN   rG   r#   s       r$   r   zSD35AdaLayerNormZeroX.__init__j   sg    GGI	iiq=/@tL$]uRVWDI8Cpqrrr%   hidden_statesr   .c           	         | j                  | j                  |            }|j                  dd      \	  }}}}}}}	}
}| j                  |      }|d|d d d f   z   z  |d d d f   z   }|d|
d d d f   z   z  |	d d d f   z   }|||||||fS )NrP   r	   r+   r   r   r-   r!   )r"   rY   r   	shift_msa	scale_msagate_msa	shift_mlp	scale_mlpgate_mlp
shift_msa2
scale_msa2	gate_msa2norm_hidden_statesnorm_hidden_states2s                 r$   r0   zSD35AdaLayerNormZeroX.forwardt   s    
 kk$))C.)lolulu1 mv m
i	9h	9h
T^`i "YY}5*a)AtG2D.DE	RSUYRYHZZ0A
1d78K4KLzZ[]aZaObbh	9hH[]fffr%   rD   TrA   )r2   r3   r4   r5   r6   strr7   r   r9   r:   r   r   r0   r;   r<   s   @r$   rM   rM   a   su    sc sc sPT s`d s '+g||g ell#g 
u||S 	!	gr%   rM   c                   N    e Zd ZdZddedee   f fdZ	 	 	 	 ddej                  deej                     deej                     deej                     d	eej                     d
eej                  ej                  ej                  ej                  ej                  f   fdZ xZS )AdaLayerNormZero
    Norm layer adaptive layer norm zero (adaLN-Zero).

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`): The size of the embeddings dictionary.
    r   r   c                 ^   t         |           |t        ||      | _        nd | _        t	        j
                         | _        t	        j                  |d|z  |      | _        |dk(  rt	        j                  |dd      | _
        y |dk(  rt        |dd      | _
        y t        d	| d
      )N   rQ   rD   FrR   rS   fp32_layer_norm)rT   rG   rU   @) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'.)r   r   r   r   r   r   r   r   r   r    r!   r>   rW   )r"   r   r   rN   rG   r#   s        r$   r   zAdaLayerNormZero.__init__   s    %6~}UDHDHGGI	iiq=/@tL$]uRVWDI++%mTYZDI+I;6vw r%   r&   r'   class_labelshidden_dtyper   r)   c                    | j                   | j                  |||      }| j                  | j                  |            }|j                  dd      \  }}}}	}
}| j	                  |      d|d d d f   z   z  |d d d f   z   }|||	|
|fS )N)rq   rm   r	   r+   )r   r   r   r-   r!   )r"   r&   r'   rp   rq   r   r\   r]   r^   r_   r`   ra   s               r$   r0   zAdaLayerNormZero.forward   s     88((8\(MCkk$))C.)ILSTZ[I\F	9h	9hIIaLA	!T' 223i46HH(Iy(::r%   )NrD   T)NNNN)r2   r3   r4   r5   r6   r   r   r9   r:   
LongTensorrB   r   r0   r;   r<   s   @r$   rj   rj      s    c 8C= * ,037.2&*;<<; 5<<(; u//0	;
 u{{+; ell#; 
u||U\\5<<u||S	T;r%   rj   c                        e Zd ZdZddef fdZ	 d	dej                  deej                     de	ej                  ej                  ej                  ej                  ej                  f   fdZ
 xZS )
AdaLayerNormZeroSinglerk   r   c                     t         |           t        j                         | _        t        j
                  |d|z  |      | _        |dk(  rt        j                  |dd      | _        y t        d| d      )	N   rQ   rD   FrR   rS   rU   ro   rV   rX   s       r$   r   zAdaLayerNormZeroSingle.__init__   sk    GGI	iiq=/@tL$]uRVWDI+I;6vw r%   r&   r   r)   c                     | j                  | j                  |            }|j                  dd      \  }}}| j                  |      d|d d d f   z   z  |d d d f   z   }||fS )Nrw   r	   r+   r[   )r"   r&   r   r\   r]   r^   s         r$   r0   zAdaLayerNormZeroSingle.forward   sk    
 kk$))C.)),1!)<&	9hIIaLA	!T' 223i46HH({r%   rg   rA   r2   r3   r4   r5   r6   r   r9   r:   r   r   r0   r;   r<   s   @r$   ru   ru      sk    
c 
 '+<< ell# 
u||U\\5<<u||S	T	r%   ru   c                        e Zd ZdZdededef fdZ	 d
dej                  de
ej                     deej                  ej                  ej                  ej                  f   fd	Z xZS )LuminaRMSNormZeroz
    Norm layer adaptive RMS normalization zero.

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
    r   r   r   c                     t         |           t        j                         | _        t        j
                  t        |d      d|z  d      | _        t        ||      | _	        y )Ni      TrQ   rH   )
r   r   r   r   r   r   minr   RMSNormr!   )r"   r   r   r   r#   s       r$   r   zLuminaRMSNormZero.__init__   sP    GGI	iit$

 Mx8	r%   r&   r   r)   c                     | j                  | j                  |            }|j                  dd      \  }}}}| j                  |      d|d d d f   z   z  }||||fS )Nr}   r	   r+   r[   )r"   r&   r   r]   r^   r`   ra   s          r$   r0   zLuminaRMSNormZero.forward   sd    
 kk$))C.)3699QA93F0	8YIIaLA	!T' 223(Ix//r%   rA   )r2   r3   r4   r5   r6   r8   r7   r   r9   r:   r   r   r0   r;   r<   s   @r$   r{   r{      st    9c 9U 9UY 9 '+	0<<	0 ell#	0 
u||U\\5<<E	F		0r%   r{   c                       e Zd ZdZddedef fdZ	 	 	 ddej                  de	e
eej                  f      de	e   de	ej                     d	eej                  ej                  ej                  ej                  ej                  f   f
d
Z xZS )AdaLayerNormSingleaT  
    Norm layer adaptive layer norm single (adaLN-single).

    As proposed in PixArt-Alpha (see: https://huggingface.co/papers/2310.00426; Section 2.3).

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        use_additional_conditions (`bool`): To use additional conditions for normalization or not.
    r   use_additional_conditionsc                     t         |           t        ||dz  |      | _        t	        j
                         | _        t	        j                  |d|z  d      | _        y )Nrw   )size_emb_dimr   rm   TrQ   )	r   r   r   r   r   r   r   r   r   )r"   r   r   r#   s      r$   r   zAdaLayerNormSingle.__init__   sO    <(:Vo
 GGI	iiq=/@tLr%   r'   added_cond_kwargs
batch_sizerq   r)   c                     |xs d d d} | j                   |fi |||d}| j                  | j                  |            |fS )N)
resolutionaspect_ratio)r   rq   )r   r   r   )r"   r'   r   r   rq   embedded_timesteps         r$   r0   zAdaLayerNormSingle.forward  sS     .[VZ1[$DHHXu1Buzhtu{{499%678:KKKr%   )F)NNN)r2   r3   r4   r5   r6   r7   r   r9   r:   r   r   rh   rB   r   r0   r;   r<   s   @r$   r   r      s    Mc Md M @D$(.2
L,,
L $Dell):$;<
L SM	
L
 u{{+
L 
u||U\\5<<u||S	T
Lr%   r   c                        e Zd ZdZ	 ddedededee   def
 fdZde	j                  d	e	j                  d
e	j                  fdZ xZS )AdaGroupNorma  
    GroupNorm layer modified to incorporate timestep embeddings.

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`): The size of the embeddings dictionary.
        num_groups (`int`): The number of groups to separate the channels into.
        act_fn (`str`, *optional*, defaults to `None`): The activation function to use.
        eps (`float`, *optional*, defaults to `1e-5`): The epsilon value to use for numerical stability.
    r   out_dim
num_groupsact_fnrH   c                     t         |           || _        || _        |d | _        nt        |      | _        t        j                  ||dz        | _        y r   )	r   r   r   rH   actr
   r   r   r   )r"   r   r   r   r   rH   r#   s         r$   r   zAdaGroupNorm.__init__  sL     	$>DH%f-DHiiw{;r%   r&   r   r)   c                    | j                   r| j                  |      }| j                  |      }|d d d d d d f   }|j                  dd      \  }}t        j                  || j
                  | j                        }|d|z   z  |z   }|S )Nr   r	   r+   r~   )r   r   r-   rC   
group_normr   rH   )r"   r&   r   r/   r.   s        r$   r0   zAdaGroupNorm.forward(  s~    88((3-Ckk#!Qd"#yyy*uLLDOO:UOe#r%   )Nr1   )r2   r3   r4   r5   r6   r   rh   r8   r   r9   r:   r0   r;   r<   s   @r$   r   r     sg    	 mq< <+.<<?<IQRU<di<	 	ELL 	U\\ 	r%   r   c                        e Zd ZdZ	 	 	 	 d	dedef fdZdej                  dej                  dej                  fdZ xZ	S )
AdaLayerNormContinuousa  
    Adaptive normalization layer with a norm layer (layer_norm or rms_norm).

    Args:
        embedding_dim (`int`): Embedding dimension to use during projection.
        conditioning_embedding_dim (`int`): Dimension of the input condition.
        elementwise_affine (`bool`, defaults to `True`):
            Boolean flag to denote if affine transformation should be applied.
        eps (`float`, defaults to 1e-5): Epsilon factor.
        bias (`bias`, defaults to `True`): Boolean flag to denote if bias should be use.
        norm_type (`str`, defaults to `"layer_norm"`):
            Normalization layer to use. Values supported: "layer_norm", "rms_norm".
    r   conditioning_embedding_dimc                    t         |           t        j                         | _        t        j
                  ||dz  |      | _        |dk(  rt        ||||      | _        y |dk(  rt        |||      | _        y t        d|       )Nr   rQ   rD   rms_normunknown norm_type )r   r   r   r   r   r   r   r    r!   r   rW   )r"   r   r   rT   rH   rG   rN   r#   s          r$   r   zAdaLayerNormContinuous.__init__C  s     	GGI	ii :MA<MTXY$!-6H$ODI*$s4FGDI1)=>>r%   r&   conditioning_embeddingr)   c                 
   | j                  | j                  |      j                  |j                              }t	        j
                  |dd      \  }}| j                  |      d|z   d d d d d f   z  |d d d d d f   z   }|S )Nr   r	   r+   )r   r   rI   rB   r9   r-   r!   )r"   r&   r   r   r/   r.   s         r$   r0   zAdaLayerNormContinuous.forward[  su    kk$))$:;>>qwwGH{{3q1uIIaLAIq$z22U1dA:5FFr%   )Tr1   TrD   )
r2   r3   r4   r5   r6   r   r9   r:   r0   r;   r<   s   @r$   r   r   4  sV    .  ?? %(?0 u|| PUP\P\ r%   r   c                        e Zd Z	 	 	 	 	 d	dededee   f fdZdej                  dej                  dej                  fdZ xZ	S )
LuminaLayerNormContinuousr   r   r   c                 \   t         |           t        j                         | _        t        j
                  |||      | _        |dk(  rt        ||||      | _        n'|dk(  rt        |||      | _        nt        d|       d | _        |t        j
                  |||      | _        y y )NrQ   rD   r   rH   rT   r   )r   r   r   r   r   r   linear_1r    r!   r   rW   linear_2)	r"   r   r   rT   rH   rG   rN   r   r#   s	           r$   r   z"LuminaLayerNormContinuous.__init__d  s     	 GGI			"<mRVW$!-6H$ODI*$3K]^DI1)=>>IImW4HDM r%   r&   r   r)   c                     | j                  | j                  |      j                  |j                              }|}| j	                  |      d|z   d d d d d f   z  }| j
                  | j                  |      }|S Nr	   )r   r   rI   rB   r!   r   )r"   r&   r   r   r/   s        r$   r0   z!LuminaLayerNormContinuous.forward  sn     mmDII&<=@@IJIIaLAIq$z22==$a Ar%   )Tr1   TrD   N)
r2   r3   r4   r6   r   r   r9   r:   r0   r;   r<   s   @r$   r   r   c  sk      !%II %(I #I@<< !& 
	r%   r   c                        e Zd ZdZdedef fdZ	 d
dej                  dej                  deej                     de	ej                  ej                  ej                  ej                  ej                  f   fd	Z
 xZS )%CogView3PlusAdaLayerNormZeroTextImagerk   r   r,   c                    t         |           t        j                         | _        t        j
                  |d|z  d      | _        t        j                  |dd      | _        t        j                  |dd      | _	        y )N   TrQ   Fr1   rS   )
r   r   r   r   r   r   r   r    norm_xnorm_c)r"   r   r,   r#   s      r$   r   z.CogView3PlusAdaLayerNormZeroTextImage.__init__  s[    GGI	iirCxdCll35dKll35dKr%   r&   contextr   r)   c                 B   | j                  | j                  |            }|j                  dd      \  }}}}}}	}
}}}}}| j                  |      }| j	                  |      }|d|d d d f   z   z  |d d d f   z   }|d|d d d f   z   z  |
d d d f   z   }|||||	|||||f
S )Nr   r	   r+   )r   r   r-   r   r   )r"   r&   r   r   r\   r]   r^   r_   r`   ra   c_shift_msac_scale_msa
c_gate_msac_shift_mlpc_scale_mlp
c_gate_mlpnormed_xnormed_contexts                     r$   r0   z-CogView3PlusAdaLayerNormZeroTextImage.forward  s     kk$))C.) IIbaI 	
;;q>W-Iag../)AtG2DD AAtG(<$<=AtG@TT(Iy(GZQ\^ikuuur%   rA   ry   r<   s   @r$   r   r     s    Lc L L '+	v<<v v ell#	v
 
u||U\\5<<u||S	Tvr%   r   c                        e Zd Z	 	 	 ddedededededdf fdZd	ej                  d
ej                  dej                  de	ej                  ej                  f   fdZ
 xZS )CogVideoXLayerNormZeroconditioning_dimr   rT   rH   rG   r)   Nc                     t         |           t        j                         | _        t        j
                  |d|z  |      | _        t        j                  |||      | _        y )Nrm   rQ   r   )	r   r   r   r   r   r   r   r    r!   )r"   r   r   rT   rH   rG   r#   s         r$   r   zCogVideoXLayerNormZero.__init__  sL     	GGI	ii 0!m2C$OLLCL^_	r%   rY   encoder_hidden_statesr(   c                 ^   | j                  | j                  |            j                  dd      \  }}}}}}	| j                  |      d|z   d d d d d f   z  |d d d d d f   z   }| j                  |      d|z   d d d d d f   z  |d d d d d f   z   }|||d d d d d f   |	d d d d d f   fS )Nrm   r	   r+   r[   )
r"   rY   r   r(   r.   r/   gate	enc_shift	enc_scaleenc_gates
             r$   r0   zCogVideoXLayerNormZero.forward  s     >B[[SW=Y=_=_`agh=_=i:udIy(		-0AIq$z3JJUSTVZ\]S]M^^ $		*? @A	MSTVZ\]S]C^ ^ajklnrtukuav v3T!T1*5ExPQSWYZPZG[[[r%   )Tr1   T)r2   r3   r4   r6   r7   r8   r   r9   r:   r   r0   r;   r<   s   @r$   r   r     s    
 $(`` ` !	`
 ` ` 
`\"\\\BG,,\V[VbVb\	u||U\\)	*\r%   r   z>=z2.1.0c                   8     e Zd ZdZddededef fdZd Z xZS )r    a  
        LayerNorm with the bias parameter.

        Args:
            dim (`int`): Dimensionality to use for the parameters.
            eps (`float`, defaults to 1e-5): Epsilon factor.
            elementwise_affine (`bool`, defaults to `True`):
                Boolean flag to denote if affine transformation should be applied.
            bias (`bias`, defaults to `True`): Boolean flag to denote if bias should be use.
        rH   rT   rG   c                    t         |           || _        t        |t        j
                        r|f}t        j                  |      | _        |ret        j                  t        j                  |            | _        |r.t        j                  t        j                  |            | _        y d | _        y d | _        d | _        y rA   )r   r   rH   
isinstancenumbersIntegralr9   Sizer,   r   	ParameteronesrF   zerosrG   r"   r,   rH   rT   rG   r#   s        r$   r   zLayerNorm.__init__  s    GDH#w//0fzz#DH! ll5::c?;>BBLLS)9:		" 	r%   c                     t        j                  || j                  | j                  | j                  | j
                        S rA   )rC   rD   r,   rF   rG   rH   )r"   inputs     r$   r0   zLayerNorm.forward  s)    <<txxdiiRRr%   )r1   TT	r2   r3   r4   r5   r8   r7   r   r0   r;   r<   s   @r$   r    r      s)    			!U 	!t 	!Z^ 	!"	Sr%   r    c                   8     e Zd ZdZddededef fdZd Z xZS )r   a  
    RMS Norm as introduced in https://huggingface.co/papers/1910.07467 by Zhang et al.

    Args:
        dim (`int`): Number of dimensions to use for `weights`. Only effective when `elementwise_affine` is True.
        eps (`float`): Small value to use when calculating the reciprocal of the square-root.
        elementwise_affine (`bool`, defaults to `True`):
            Boolean flag to denote if affine transformation should be applied.
        bias (`bool`, defaults to False): If also training the `bias` param.
    rH   rT   rG   c                    t         |           || _        || _        t	        |t
        j                        r|f}t        j                  |      | _	        d | _
        d | _        |r^t        j                  t        j                  |            | _
        |r.t        j                  t        j                  |            | _        y y y rA   )r   r   rH   rT   r   r   r   r9   r   r,   rF   rG   r   r   r   r   r   s        r$   r   zRMSNorm.__init__  s    "4c7++,&C::c?	,,uzz#7DKLLS)9:	  r%   c                    t               rdd l}| j                  [| j                  j                  t        j
                  t        j                  fv r%|j                  | j                  j                        }|j                  || j                  | j                        d   }| j                  || j                  z   }|S |j                  }|j                  t        j                        j                  d      j                  dd      }|t	        j                  || j                  z         z  }| j                  | j                  j                  t        j
                  t        j                  fv r%|j                  | j                  j                        }|| j                  z  }| j                  || j                  z   }|S |j                  |      }|S )Nr   )epsilonr   Tkeepdim)r   	torch_npurF   rB   r9   float16bfloat16rI   npu_rms_normrH   rG   float32powmeanrsqrt)r"   rY   r   input_dtypevariances        r$   r0   zRMSNorm.forward  sw   !#{{&;;$$(GG$1$4$4T[[5F5F$GM%22=$++W[W_W_2`abcMyy$ -		 9   (--K$''6::1=BB2tBTH)EKK4888K,LLM{{&;;$$(GG$1$4$4T[[5F5F$GM - ;99($1DII$=M  !. 0 0 =r%   )TFr   r<   s   @r$   r   r     s'    	; ;D ;t ;&r%   r   c                   0     e Zd Zddedef fdZd Z xZS )MochiRMSNormrH   rT   c                    t         |           || _        t        |t        j
                        r|f}t        j                  |      | _        |r.t        j                  t        j                  |            | _        y d | _        y rA   )r   r   rH   r   r   r   r9   r   r,   r   r   r   rF   )r"   r,   rH   rT   r#   s       r$   r   zMochiRMSNorm.__init__>  s]    c7++,&C::c?,,uzz#7DKDKr%   c                 >   |j                   }|j                  t        j                        j	                  d      j                  dd      }|t        j                  || j                  z         z  }| j                  || j                  z  }|j                  |      }|S )Nr   r   Tr   )	rB   rI   r9   r   r   r   r   rH   rF   )r"   rY   r   r   s       r$   r0   zMochiRMSNorm.forwardM  s    #)) ##EMM266q9>>r4>P%Htxx4G(HH;;")DKK7M%((5r%   )T)r2   r3   r4   r8   r7   r   r0   r;   r<   s   @r$   r   r   =  s     D 	r%   r   c                   (     e Zd ZdZ fdZd Z xZS )GlobalResponseNormz
    Global response normalization as introduced in ConvNeXt-v2 (https://huggingface.co/papers/2301.00808).

    Args:
        dim (`int`): Number of dimensions to use for the `gamma` and `beta`.
    c                     t         |           t        j                  t	        j
                  ddd|            | _        t        j                  t	        j
                  ddd|            | _        y r   )r   r   r   r   r9   r   gammabeta)r"   r,   r#   s     r$   r   zGlobalResponseNorm.__init__b  sL    \\%++aAs";<
LLQ1c!:;	r%   c                     t        j                  |ddd      }||j                  dd      dz   z  }| j                  ||z  z  | j                  z   |z   S )Nr   )r	   r   T)pr,   r   r   )r,   r   rR   )r9   r!   r   r   r   )r"   r&   gxnxs       r$   r0   zGlobalResponseNorm.forwardg  sS    ZZQFD9277r470478zzQV$tyy0144r%   )r2   r3   r4   r5   r   r0   r;   r<   s   @r$   r   r   Y  s    <
5r%   r   c                   f     e Zd Zddededef fdZdej                  dej                  fdZ xZ	S )	LpNormr   r,   rH   c                 L    t         |           || _        || _        || _        y rA   )r   r   r   r,   rH   )r"   r   r,   rH   r#   s       r$   r   zLpNorm.__init__n  s#    r%   rY   r)   c                 p    t        j                  || j                  | j                  | j                        S )N)r   r,   rH   )rC   	normalizer   r,   rH   )r"   rY   s     r$   r0   zLpNorm.forwardu  s#    {{=DFFdhhOOr%   )r   r   g-q=)
r2   r3   r4   r6   r8   r   r9   r:   r0   r;   r<   s   @r$   r   r   m  s;    #  u PU\\ Pell Pr%   r   rN   num_featuresrH   rT   rG   r)   c                     | dk(  rt        ||||      }|S | dk(  rt        j                  ||||      }|S | dk(  rt        j                  |||      }|S t	        d| d      )Nr   )rH   rT   rG   rD   
batch_norm)rH   affinez
norm_type=z is not supported.)r   r   r    BatchNorm2drW   )rN   r   rH   rT   rG   r!   s         r$   get_normalizationr   y  s     J|ASZ^_ K 
l	"||LcFX_cd
 K	 
l	"~~l<NO K JI<'9:;;r%   )r   Nr1   TT)*r   typingr   r   r   r9   torch.nnr   torch.nn.functional
functionalrC   utilsr   r   activationsr
   
embeddingsr   r   Moduler   r    r>   rM   rj   ru   r{   r   r   r   r   r   r   r   r   r   r   rh   r6   r8   r7   r   rK   r%   r$   <module>r      s     ( (     < ' b6299 6r	BLL 	gBII gD(;ryy (;VRYY @0		 0<L LD#299 #L,RYY ,^.		 .b*vBII *vZ\RYY \0 D'"ISBII SB9bii 9|299 85 5(	PRYY 	P ""&#3- 
 	
  YYr%   