
    ukiBj                       U d Z ddlmZ ddlmZ ddlZddlZddlmZmZm	Z	m
Z
 ddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ  ed      ZeZeZ eZ!eZ"eez  dz  Z#de$d<   eejJ                   G d de	                    Z&e	 	 d&	 	 	 	 	 	 	 d'd       Z'e	 	 d&	 	 	 	 	 	 	 d'd       Z(e	 d(	 	 	 d)d       Z)e	 	 d*	 	 	 d+d       Z*e	 	 d*	 	 	 d,d       Z+e	 	 	 	 d-	 	 	 	 	 	 	 d.d       Z,e	 	 	 d/	 	 	 	 	 	 	 d0d       Z-	 	 	 	 	 	 d1dZ.	 	 	 	 	 	 d2dZ/e	 	 	 	 d3	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d4d       Z0e	 	 	 	 d3	 	 	 	 	 	 	 d5d       Z1e1Z2e	 	 	 	 d3	 	 	 	 	 	 	 d5d       Z3e3Z4e	 	 	 	 d3	 	 	 	 	 	 	 d5d        Z5e	 	 	 	 d3	 	 	 	 	 	 	 d5d!       Z6e	 	 	 	 d3	 	 	 	 	 	 	 d5d"       Z7e7Z8e	 	 	 	 d3	 	 	 	 	 	 	 d5d#       Z9e9Z:e	 	 	 d6	 	 	 	 	 d7d$       Z;e	 	 	 d6	 	 	 	 	 	 	 d7d%       Z<y)8za
Common neural network layer initializers, consistent with definitions
used in Keras and Sonnet.
    )annotations)SequenceN)AnyLiteralProtocol	TypeAlias)core)dtypes)numpy)random)NamedSharding)PartitionSpec)canonicalize_sharding)Array	ArrayLikeDType)
set_modulezjax.nn.initializersr   OutShardingTypec                  0    e Zd ZdZ	 	 d	 	 	 	 	 	 	 	 	 ddZy)InitializerzFProtocol for initializers returned by :mod:`jax.nn.initializers` APIs.Nc                    t         N)NotImplementedError)selfkeyshapedtypeout_shardings        S/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/nn/initializers.py__call__zInitializer.__call__5   s
    
     NN
r   r   r   z
core.Shaper   DTypeLikeInexact | Noner   r   returnr   )__name__
__module____qualname____doc__r     r!   r   r   r   1   sC     O 15/3	  . -	 9>	r!   r   c                b    |t        j                         n|}t        j                  |||      S )a  An initializer that returns a constant array full of zeros.

  The ``key`` argument is ignored.

  >>> import jax, jax.numpy as jnp
  >>> jax.nn.initializers.zeros(jax.random.key(42), (2, 3), jnp.float32)
  Array([[0., 0., 0.],
         [0., 0., 0.]], dtype=float32)
  r   )r
   default_float_dtypejnpzerosr   r   r   r   s       r   r/   r/   <   s+     +0-&
$
$
&U%	5%l	;;r!   c                b    |t        j                         n|}t        j                  |||      S )a  An initializer that returns a constant array full of ones.

  The ``key`` argument is ignored.

  >>> import jax, jax.numpy as jnp
  >>> jax.nn.initializers.ones(jax.random.key(42), (3, 2), jnp.float32)
  Array([[1., 1.],
         [1., 1.],
         [1., 1.]], dtype=float32)
  r,   )r
   r-   r.   onesr0   s       r   r2   r2   M   s+     +0-&
$
$
&U%	%\	::r!   c                (     |df	 	 	 	 	 	 	 d fd}|S )a  Builds an initializer that returns arrays full of a constant ``value``.

  Args:
    value: the constant value with which to fill the initializer.
    dtype: optional; the initializer's default dtype.

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.constant(-7)
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)
  Array([[-7., -7., -7.],
         [-7., -7., -7.]], dtype=float32)
  Nc                ~    |t        j                         n|}t        |d      }t        j                  |||      S )Nznn.initializers.constant)r   device)r
   r-   r   r.   full)r   r   r   r   values       r   initzconstant.<locals>.initn   s;     -2MF&&(uE(7QRL88E5lCCr!   r#   r*   )r7   r   r8   s   `  r   constantr9   _   s@    " -2+/DD)D )D 5:D 
+r!   c                (     |df	 	 	 	 	 	 	 d fd}|S )aY  Builds an initializer that returns real uniformly-distributed random arrays.

  Args:
    scale: optional; the upper bound of the random distribution.
    dtype: optional; the initializer's default dtype.

  Returns:
    An initializer that returns arrays whose values are uniformly distributed in
    the range ``[0, scale)``.

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.uniform(10.0)
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)  # doctest: +SKIP
  Array([[7.298188 , 8.691938 , 8.7230015],
         [2.0818567, 1.8662417, 5.5022564]], dtype=float32)
  Nc                    |t        j                         n|}t        j                  | |||      t	        j
                  |      z  S Nr,   )r
   r-   r   uniformr.   array)r   r   r   r   scales       r   r8   zuniform.<locals>.init   sH     -2MF&&(uE>>#ue'357:yy7NO Or!   r#   r*   )r?   r   r8   s   `  r   r=   r=   w   @    * -2+/OO)O )O 5:O 
+r!   c                (     |df	 	 	 	 	 	 	 d fd}|S )ax  Builds an initializer that returns real normally-distributed random arrays.

  Args:
    stddev: optional; the standard deviation of the distribution.
    dtype: optional; the initializer's default dtype.

  Returns:
    An initializer that returns arrays whose values are normally distributed
    with mean ``0`` and standard deviation ``stddev``.

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.normal(5.0)
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)  # doctest: +SKIP
  Array([[ 3.0613258 ,  5.6129413 ,  5.6866574 ],
         [-4.063663  , -4.4520254 ,  0.63115686]], dtype=float32)
  Nc                    |t        j                         n|}t        j                  | |||      t	        j
                  |      z  S r<   )r
   r-   r   normalr.   r>   )r   r   r   r   stddevs       r   r8   znormal.<locals>.init   sH     -2MF&&(uE==eU&2469ii6NO Or!   r#   r*   )rD   r   r8   s   `  r   rC   rC      r@   r!   c                0     |df	 	 	 	 	 	 	 d fd}|S )a  Builds an initializer that returns truncated-normal random arrays.

  Args:
    stddev: optional; the standard deviation of the untruncated distribution.
      Note that this function does not apply the stddev correction as is done in
      the variancescaling initializers, and users are expected to apply this
      correction themselves via the stddev arg if they wish to employ it.
    dtype: optional; the initializer's default dtype.
    lower: Float representing the lower bound for truncation. Applied before
      the output is multiplied by the stddev.
    upper: Float representing the upper bound for truncation. Applied before
      the output is multiplied by the stddev.

  Returns:
    An initializer that returns arrays whose values follow the truncated normal
    distribution with mean ``0`` and standard deviation ``stddev``, and range
    :math:`\rm{lower * stddev} < x < \rm{upper * stddev}`.

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.truncated_normal(5.0)
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)  # doctest: +SKIP
  Array([[ 2.9047365,  5.2338114,  5.29852  ],
         [-3.836303 , -4.192359 ,  0.6022964]], dtype=float32)
  Nc                    |t        j                         n|}t        j                  | |||      t	        j
                  |      z  S r<   )r
   r-   r   truncated_normalr.   r>   )r   r   r   r   lowerrD   uppers       r   r8   ztruncated_normal.<locals>.init   sM     -2MF&&(uE""UE5%!#%(YYvu%=> >r!   r#   r*   )rD   r   rH   rI   r8   s   ` `` r   rG   rG      s;    > -2+/>>)> )> 5:> 
+r!   c                V   t        |t              r+|dk(  r&t        |       dk  rt        dt        |        d      t        |t              r| |   }n&t	        j
                  |D cg c]  }| |   	 c}      }t        |t              r| |   }n&t	        j
                  |D cg c]  }| |   	 c}      }t        |t              r| |   }n&t	        j
                  |D cg c]  }| |   	 c}      }t	        j
                  |       |z  |z  |z  }||z  }	||z  }
|	|
fS c c}w c c}w c c}w )z
  Compute effective input and output sizes for a linear or convolutional layer.

  Axes not in in_axis, out_axis, or batch_axis are assumed to constitute the
  "receptive field" of a convolution (kernel spatial dimensions).
     z*Can't compute input and output sizes of a zd-dimensional weights tensor with default in_axis. Must be at least 2D or specify in_axis explicitly.)
isinstanceintlen
ValueErrormathprod)r   in_axisout_axis
batch_axisin_sizeiout_size
batch_sizereceptive_field_sizefan_infan_outs              r   _compute_fansr]      s!    'R-CJ!O

4SZL A 	  GnGii73aq34G#XHyyH5q%(56H
C z"Jj9E!H9:J5)G3h>K))&++'	 4 6 :s   %D!D!D&c                   t        j                  |       \  }}t        j                  d|      j                  j
                  }t        j                  |      }t        j                  dt        j                  |||      z        j                  |      }dt        j                  z  t        j                  |||      j                  |      z  }|t        j                  d|z        z  S )zj
  Sample uniform random values within a disk on the complex plane,
  with zero mean and unit variance.
  r                    ?)r   splitnpr>   realr   r
   to_complex_dtyper.   sqrtr=   astypepiexp)r   r   r   key_r	key_theta
real_dtyperthetas           r   _complex_uniformrn      s     \\#&%xx5!&&,,*

!
!*
-%	hhq6>>%
;;<CCEJ!
bee)fnnYzBII%P
P%	
SWWR%Z 	  r!   c                `   t        j                  |       \  }}t        j                  d|      j                  j
                  }t        j                  |      }dt        j                  t        j                  |dz   |            z
  t        j                  |||      j                  |      z  }t        j                  t        j                  d|z
               }dt        j                  z  t        j                  |||      j                  |      z  }	|t        j                  d|	z        z  S )z
  Sample random values from a centered normal distribution on the complex plane,
  whose modulus is truncated to `upper`, and the variance before the truncation
  is one.
  r   rL   r_   r`   )r   ra   rb   r>   rc   r   r
   rd   r.   rh   r=   rf   re   logrg   )
r   rI   r   r   ri   rj   rk   trl   rm   s
             r   _complex_truncated_normalrr     s     \\#&%xx5!&&,,*

!
!*
-%	CGGCII
mU344	uj	1	8	8	?@!	hhA!
bee)fnnYzBII%P
P%	
SWWR%Z 	  r!   c                <     |df	 	 	 	 	 	 	 d fd}|S )a#  
  Initializer that adapts its scale to the shape of the weights tensor.

  With ``distribution="truncated_normal"`` or ``distribution="normal"``, samples
  are drawn from a (truncated) normal distribution with a mean of zero
  and a standard deviation (after truncation, if applicable) of
  :math:`\sqrt{\frac{scale}{n}}`, where `n` is, for each ``mode``:

  * ``"fan_in"``: the number of inputs
  * ``"fan_out"``: the number of outputs
  * ``"fan_avg"``: the arithmetic average of the numbers of inputs and outputs
  * ``"fan_geo_avg"``: the geometric average of the numbers of inputs and outputs

  This initializer can be configured with ``in_axis``, ``out_axis``, and
  ``batch_axis`` to work with general convolutional or dense layers; axes that
  are not in any of those arguments are assumed to be the "receptive field"
  (convolution kernel spatial axes).

  With ``distribution="truncated_normal"``, the absolute values of the samples
  are truncated at 2 standard deviations before scaling.

  With ``distribution="uniform"``, samples are drawn from:

  * a uniform interval, if `dtype` is real, or
  * a uniform disk, if `dtype` is complex,

  with a mean of zero and a standard deviation of :math:`\sqrt{\frac{scale}{n}}`
  where `n` is defined above.

  Args:
    scale: scaling factor (positive float).
    mode: one of ``"fan_in"``, ``"fan_out"``, ``"fan_avg"``, and ``"fan_geo_avg"``.
    distribution: random distribution to use. One of ``"truncated_normal"``,
      ``"normal"`` and ``"uniform"``.
    in_axis: axis or sequence of axes of the input dimension in the weights
      array.
    out_axis: axis or sequence of axes of the output dimension in the weights
      array.
    batch_axis: axis or sequence of axes in the weight array that should be
      ignored.
    dtype: the dtype of the weights.
  Nc                
   t        j                  |      }|t        j                         n|}t	        |	      \  }}dk(  r|}n2dk(  r|}n*dk(  r	||z   dz  }ndk(  r	||z  dz  }nt        d       t        j                  |z  |      }
d	k(  rt        j                  |t        j                        rJt        j                  |      t        j                  d
|      z  }t        j                  | dd|||      |z  S t        j                  |      t        j                  d|      z  }t        | d||      |z  S 
dk(  r/t        j                  | |||      t        j                  |      z  S 
dk(  rzt        j                  |t        j                        r3t        j                   | ||d|      t        j                  d|z        z  S t#        | ||      t        j                  |      z  S t        d
       )Nr[   r\   fan_avgr_   fan_geo_avgg      ?z/invalid mode for variance scaling initializer: r   rG   g۶%?rK   r,   gVr?rC   r=      z7invalid distribution for variance scaling initializer: )r	   canonicalize_shaper
   r-   r]   rP   r.   r>   
issubdtyperb   floatingre   r   rG   rr   rC   r=   rn   )r   r   r   r   r[   r\   denominatorvariancerD   rU   distributionrS   moderT   r?   s            r   r8   zvariance_scaling.<locals>.initN  s    ##E*E,1MF&&(uE#E7HjIOFGxv		'K		6G+;q*@K		v/?C.G
9$@B Byy,E:H))			5"++	.(#cii0BE&JJ&&sB5%4@BDJK 	K (#cii0BE&JJ(a>GG		!]]3u(468;8JK K		"			5"++	.~~c5%+79;>88AL;QR 	R  UE2SXXh5GGGPQ]P^_``r!   r#   r*   )r?   r   r   rS   rT   rU   r   r8   s   ``````  r   variance_scalingr     sH    n -2+/$a$a)$a )$a 5:$a $aL 
+r!   c           	     &    t        ddd| |||      S )a  Builds a Glorot uniform initializer (aka Xavier uniform initializer).

  A `Glorot uniform initializer`_ is a specialization of
  :func:`jax.nn.initializers.variance_scaling` where ``scale = 1.0``,
  ``mode="fan_avg"``, and ``distribution="uniform"``.

  Args:
    in_axis: axis or sequence of axes of the input dimension in the weights
      array.
    out_axis: axis or sequence of axes of the output dimension in the weights
      array.
    batch_axis: axis or sequence of axes in the weight array that should be
      ignored.
    dtype: the dtype of the weights.

  Returns:
    An initializer.

  Examples:

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.glorot_uniform()
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)  # doctest: +SKIP
  Array([[ 0.50350785,  0.8088631 ,  0.81566876],
         [-0.6393332 , -0.6865721 ,  0.11003882]], dtype=float32)

  .. _Glorot uniform initializer: http://proceedings.mlr.press/v9/glorot10a.html
        ?ru   r=   rS   rT   rU   r   r   r   s       r   glorot_uniformr   v  s#    B 
#y)W#+
%
Q Qr!   c           	     &    t        ddd| |||      S )a  Builds a Glorot normal initializer (aka Xavier normal initializer).

  A `Glorot normal initializer`_ is a specialization of
  :func:`jax.nn.initializers.variance_scaling` where ``scale = 1.0``,
  ``mode="fan_avg"``, and ``distribution="truncated_normal"``.

  Args:
    in_axis: axis or sequence of axes of the input dimension in the weights
      array.
    out_axis: axis or sequence of axes of the output dimension in the weights
      array.
    batch_axis: axis or sequence of axes in the weight array that should be
      ignored.
    dtype: the dtype of the weights.

  Returns:
    An initializer.

  Examples:

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.glorot_normal()
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)  # doctest: +SKIP
  Array([[ 0.41770416,  0.75262755,  0.7619329 ],
         [-0.5516644 , -0.6028657 ,  0.08661086]], dtype=float32)

  .. _Glorot normal initializer: http://proceedings.mlr.press/v9/glorot10a.html
  r   ru   rG   r   r   r   s       r   glorot_normalr     s$    B 
#y*<g#+
%
Q Qr!   c           	     &    t        ddd| |||      S )a  Builds a Lecun uniform initializer.

  A `Lecun uniform initializer`_ is a specialization of
  :func:`jax.nn.initializers.variance_scaling` where ``scale = 1.0``,
  ``mode="fan_in"``, and ``distribution="uniform"``.

  Args:
    in_axis: axis or sequence of axes of the input dimension in the weights
      array.
    out_axis: axis or sequence of axes of the output dimension in the weights
      array.
    batch_axis: axis or sequence of axes in the weight array that should be
      ignored.
    dtype: the dtype of the weights.

  Returns:
    An initializer.

  Examples:

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.lecun_uniform()
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)  # doctest: +SKIP
  Array([[ 0.56293887,  0.90433645,  0.9119454 ],
         [-0.71479625, -0.7676109 ,  0.12302713]], dtype=float32)

  .. _Lecun uniform initializer: https://arxiv.org/abs/1706.02515
  r   r[   r=   r   r   r   s       r   lecun_uniformr     #    B 
#xG#+
%
Q Qr!   c           	     &    t        ddd| |||      S )a  Builds a Lecun normal initializer.

  A `Lecun normal initializer`_ is a specialization of
  :func:`jax.nn.initializers.variance_scaling` where ``scale = 1.0``,
  ``mode="fan_in"``, and ``distribution="truncated_normal"``.

  Args:
    in_axis: axis or sequence of axes of the input dimension in the weights
      array.
    out_axis: axis or sequence of axes of the output dimension in the weights
      array.
    batch_axis: axis or sequence of axes in the weight array that should be
      ignored.
    dtype: the dtype of the weights.

  Returns:
    An initializer.

  Examples:

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.lecun_normal()
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)  # doctest: +SKIP
  Array([[ 0.46700746,  0.8414632 ,  0.8518669 ],
         [-0.61677957, -0.67402434,  0.09683388]], dtype=float32)

  .. _Lecun normal initializer: https://arxiv.org/abs/1706.02515
  r   r[   rG   r   r   r   s       r   lecun_normalr     $    B 
#x);W#+
%
Q Qr!   c           	     &    t        ddd| |||      S )a  Builds a He uniform initializer (aka Kaiming uniform initializer).

  A `He uniform initializer`_ is a specialization of
  :func:`jax.nn.initializers.variance_scaling` where ``scale = 2.0``,
  ``mode="fan_in"``, and ``distribution="uniform"``.

  Args:
    in_axis: axis or sequence of axes of the input dimension in the weights
      array.
    out_axis: axis or sequence of axes of the output dimension in the weights
      array.
    batch_axis: axis or sequence of axes in the weight array that should be
      ignored.
    dtype: the dtype of the weights.

  Returns:
    An initializer.

  Examples:

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.he_uniform()
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)  # doctest: +SKIP
  Array([[ 0.79611576,  1.2789248 ,  1.2896855 ],
         [-1.0108745 , -1.0855657 ,  0.17398663]], dtype=float32)

  .. _He uniform initializer: https://arxiv.org/abs/1502.01852
         @r[   r=   r   r   r   s       r   
he_uniformr   
  r   r!   c           	     &    t        ddd| |||      S )a  Builds a He normal initializer (aka Kaiming normal initializer).

  A `He normal initializer`_ is a specialization of
  :func:`jax.nn.initializers.variance_scaling` where ``scale = 2.0``,
  ``mode="fan_in"``, and ``distribution="truncated_normal"``.

  Args:
    in_axis: axis or sequence of axes of the input dimension in the weights
      array.
    out_axis: axis or sequence of axes of the output dimension in the weights
      array.
    batch_axis: axis or sequence of axes in the weight array that should be
      ignored.
    dtype: the dtype of the weights.

  Returns:
    An initializer.

  Examples:

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.he_normal()
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)  # doctest: +SKIP
  Array([[ 0.6604483 ,  1.1900088 ,  1.2047218 ],
         [-0.87225807, -0.95321447,  0.1369438 ]], dtype=float32)

  .. _He normal initializer: https://arxiv.org/abs/1502.01852
  r   r[   rG   r   r   r   s       r   	he_normalr   0  r   r!   c                ,     |df	 	 	 	 	 	 	 d fd}|S )a  
  Builds an initializer that returns uniformly distributed orthogonal matrices.

  If the shape is not square, the matrices will have orthonormal rows or columns
  depending on which side is smaller.

  Args:
    scale: the upper bound of the uniform distribution.
    column_axis: the axis that contains the columns that should be orthogonal.
    dtype: the default dtype of the weights.

  Returns:
    An orthogonal initializer.

  Examples:

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.orthogonal()
  >>> initializer(jax.random.key(42), (2, 3), jnp.float32)  # doctest: +SKIP
  Array([[ 3.9026976e-01,  7.2495741e-01, -5.6756169e-01],
         [ 8.8047469e-01, -4.7409311e-01, -1.3157725e-04]],            dtype=float32)
  Nc           	        |t         |t        j                         n|}t        |      dk  rt	        d      t        j                  |      |   z  |   }}t        j                  | |d||      }t        j                  |t        t        j                  |            |   fz         }t        j                  |d      }t        j                  |      |z  S )Nr_   z3orthogonal initializer requires at least a 2D shaper*   rx   )r   r
   r-   rO   rP   rQ   rR   r   
orthogonalr.   reshapetuplerb   deletemoveaxisr>   )	r   r   r   r   n_rowsn_colsQcolumn_axisr?   s	          r   r8   zorthogonal.<locals>.initp  s     ,1MF&&(uE
5zA~LMMYYu%{);;U;=OFF#vr5&9AAuRYYuk:;u[?Q>SSTAQK(A99UE"Q&&r!   r#   r*   r?   r   r   r8   s   ``  r   r   r   V  s;    8 -2+/'')' )' 5:' 
+r!   c                ,     |df	 	 	 	 	 	 	 d fd}|S )ae  
  Builds an initializer for delta orthogonal kernels.

  Args:
    scale: the upper bound of the uniform distribution.
    column_axis: the axis that contains the columns that should be orthogonal.
    dtype: the default dtype of the weights.

  Returns:
    A `delta orthogonal initializer`_. The shape passed to the initializer must
    be 3D, 4D, or 5D.

  Examples:

  >>> import jax, jax.numpy as jnp
  >>> initializer = jax.nn.initializers.delta_orthogonal()
  >>> initializer(jax.random.key(42), (3, 3, 3), jnp.float32)  # doctest: +SKIP
  Array([[[ 0.        ,  0.        ,  0.        ],
          [ 0.        ,  0.        ,  0.        ],
          [ 0.        ,  0.        ,  0.        ]],
  <BLANKLINE>
         [[ 0.27858758, -0.7949833 , -0.53887904],
          [ 0.9120717 ,  0.04322892,  0.40774566],
          [-0.30085585, -0.6050892 ,  0.73712474]],
  <BLANKLINE>
         [[ 0.        ,  0.        ,  0.        ],
          [ 0.        ,  0.        ,  0.        ],
          [ 0.        ,  0.        ,  0.        ]]], dtype=float32)


  .. _delta orthogonal initializer: https://arxiv.org/abs/1806.05393
  Nc                t   |t         |t        j                         n|}t        |      dvrt	        d      |d   |d   k  rt	        d      t        |      } || |dd        }t        j                  ||      }t        |      dk(  r+|d	   }|j                  |d
z
  dz  df   j                  |      S t        |      dk(  r5|d d \  }}	|j                  |d
z
  dz  |	d
z
  dz  df   j                  |      S |d d \  }}	}
|j                  |d
z
  dz  |	d
z
  dz  |
d
z
  dz  df   j                  |      S )N)ry         z;Delta orthogonal initializer requires a 3D, 4D or 5D shape.rx   rK   z/`fan_in` must be less or equal than `fan_out`. )r?   r   r   rw   ry   r   rL   r_   .r   )
r   r
   r-   rO   rP   r   r.   r/   atset)r   r   r   r   
ortho_initortho_matrixWkk1k2k3r   r?   s              r   r8   zdelta_orthogonal.<locals>.init  s`    ,1MF&&(uE
5z"   ! !Ry59HII%[NJc5:.L		%u%A
5zQ
(aTT1Q3(C- $$\22	UqRayfb"TT2a4!)bdQY+,00>>!9jb"bTT2a4!)bdQYA	367;;LIIr!   r#   r*   r   s   ``  r   delta_orthogonalr     sA    N -2+/JJ)J )J 5:J0 
+r!   r"   r#   r   )r7   r   r   r$   r%   r   ){Gz?N)r?   RealNumericr   r$   r%   r   )rD   r   r   r$   r%   r   )r   Ng       r   )
rD   r   r   r$   rH   r   rI   r   r%   r   )rK   rx   r*   )
r   Sequence[int]rS   int | Sequence[int]rT   r   rU   r   r%   ztuple[float, float])r   r   r   r   r   r   r%   r   )
r   r   rI   r   r   r   r   r   r%   r   )rK   rx   r*   N)r?   r   r   zTLiteral['fan_in'] | Literal['fan_out'] | Literal['fan_avg'] | Literal['fan_geo_avg']r   zDLiteral['truncated_normal'] | Literal['normal'] | Literal['uniform']rS   r   rT   r   rU   r   r   r$   r%   r   )
rS   r   rT   r   rU   r   r   r$   r%   r   )r   rx   N)r?   r   r   rN   r   r$   r%   r   )=r)   
__future__r   collections.abcr   rQ   typingr   r   r   r   r   rb   jax._srcr	   r
   r.   r   jax._src.named_shardingr   jax._src.partition_specr   jax._src.sharding_implsr   jax._src.typingr   r   r   jax._src.utilr   exportDTypeLikeFloatDTypeLikeComplexDTypeLikeInexactr   r   __annotations__runtime_checkabler   r/   r2   r9   r=   rC   rG   r]   rn   rr   r   r   xavier_uniformr   xavier_normalr   r   r   kaiming_uniformr   kaiming_normalr   r   r*   r!   r   <module>r      s%  
 # $   4 4    !  1 1 9 3 3 $	)	*   *]:TA A(     ,0*.<<(< (< 49< <   +/)-;;'; '; 38; ;" .2+7B . !%-1*6A 6 !%,0)5@ 6 +/6:*.*-$3$'$ ($ 3>$ $L 132446!.!/! 2! +	! !F!)!!!&+!!%2!%*!/4!"  "$"$$&#'Z	Z\Z)Z
 Z  Z "Z 
!Z Z Zx 24355748!Q0!Q2!Q 2!Q >I!Q !QF  13244637!Q/!Q1!Q 1!Q =H!Q !QF 13244637!Q/!Q1!Q 1!Q =H!Q !QF 02133526!Q.!Q0!Q 0!Q <G!Q !QF .0/11304!Q,!Q.!Q .!Q :E!Q !QF -/.002/3!Q+!Q-!Q -!Q 9D!Q !QF $'"$04''-'9D' 'R #'<	<< 
!< -8< <r!   