
    ukiQ%                       d Z ddlmZ ddlZddlZddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ dd	lmZ dd
lmZ dd
lmZ ddZddZddZd Zd Zd Z ej0                  ej2                  d      ddddd	 	 	 	 	 	 	 dd       Zy)u0  A JIT-compatible library for QDWH-based polar decomposition.

QDWH is short for QR-based dynamically weighted Halley iteration. The Halley
iteration implemented through QR decmopositions does not require matrix
inversion. This is desirable for multicore and heterogeneous computing systems.

Reference: Nakatsukasa, Yuji, Zhaojun Bai, and François Gygi.
"Optimizing Halley's iteration for computing the matrix polar decomposition."
SIAM Journal on Matrix Analysis and Applications 31, no. 5 (2010): 2700-2720.
https://epubs.siam.org/doi/abs/10.1137/090774999
    )annotationsN)api)config)core)dtypes)lax)numpy)linalgc                $   t        j                  |       t        |      k(  sJ d}t        |      D ]C  \  }}|	t	        j
                  t         j                  | j                  |      |k  }||n||z  }E || S t        j                  || |      S )zMasks `x` up to the dynamic shape `dims`.

  Replaces values outside those dimensions with `alternative`. `alternative` is
  broadcast with `x`.
  N)
npndimlen	enumerater   broadcasted_iotaint32shapejnpwhere)xdimsalternativemaskid
mask_dim_is          S/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/tpu/linalg/qdwh.py_maskr   ,   s     
s4y	  	 	$o Ada}''!''1=Aj<ZdZ.?dA l?		$; ??    c                    dg| j                   z  }|||f||<   t        j                  | t        j                  || j
                        |      S )N)r   r   r   )r   r   padr   arraydtype)r   lowhighinterior
fill_valueaxispadss          r   _pad_in_dimr)   :   sD    
qvv	$T8$$t*	CIIj!''2D	99r   c                    |t        j                  | |g|      S t        j                  t	        | |j
                  |   |      |||      S )zIConcatenates padded arrays `a` and `b` where the true size of `a` is `m`.r'   )r$   r'   )r   concatenater   dynamic_update_slice_in_dimr)   r   )abmr'   s       r   _dynamic_concatr1   ?   sK    Y??Aq6--		(	(!!''$-d3Q4
A Ar   c           	        |\  }}}| j                   \  }}t        || z  t        j                  |t	        j
                  |             |      }	t        j                  |	d      \  }
}t        t        j                  |
d||f      ||f      }t        j                  |
||d      }t        |||f      j                  j                         }|| z  |||z  z  z   S )zQDWH iteration using QR decomposition.

  Args:
  u: a matrix, with static (padded) shape M x N.
  m, n: the dynamic shape of the matrix, where m <= M and n <= N.
  params: the QDWH parameters.
  r"   F)full_matrices)r   r   r   r+   )r   r1   r   eyer   r"   
lax_linalgqrr   r   slicedynamic_slice_in_dimTconj)ur0   nparamsa_minus_e_by_sqrt_csqrt_ceMNyq_q1q2s                 r   _use_qrrI   G   s     $* vq	
$!Qfqj#''!6<<?"CQG!	q	.$!QSYYq&1a&)Aq62"
1aa0"R!Q!"	
Q$R0	00r   c                ,   |\  }}}| j                   \  }}|| j                  j                         | z  z  t        j                  |t        j                  |             z   }	t        |	||ft        j                  ||	j                              }	t        j                  |	d      }
t        j                  |
| j                  ddd      j                         }t        j                  |
|dddd      j                  j                         }|| z  ||z  z   S )zQDWH iteration using Cholesky decomposition.

  Args:
  u: a matrix, with static (padded) shape M x N
  m, n: the dynamic shape of the matrix, where m <= M and n <= N.
  params: the QDWH parameters.
  r3   F)symmetrize_inputT)	left_sidelowerconjugate_a)rL   rM   transpose_arN   )r   r:   r;   r   r5   r   r"   r   r6   choleskytriangular_solve)r<   r0   r=   r>   	a_minus_ecrA   rF   rC   r   rD   zs               r   _use_choleskyrU   \   s     /)Q	
$!Q13388:>SWWQfll1o>>! A1vswwq01! !e4!!!Dd<<@DF  !!!Q$d.2FFGa  
QQ	r   c                f     |2t        t        j                   j                        j                        }t        j                   d      }t        j                   t        j                        }t        j                  |      t        j                  |      z  }t        j                  |dk(  d|      } |j                   j                        z  }|}	d|z  dz  }
t        j                  |
       d }d d}g }g }d}|	|
z   dk  r|k  r|dz  }|	|	z  }d	d|z  dz
  z  |z  d
z  }d|z   dz  }|d|z
  dd|z
  z  ||z  z  z   dz  z   }|dz
  dz  d	z  }||z   dz
  }|	|||z  z   z  d||z  z   z  }	||kD  r|j                   ||||             n|j                   |||             |	|
z   dk  r|k  r fd fd} |||t         d      \  }} |||t"        d      \  }}fd}fd}t%        |      t%        |      z   }t        j&                  |||||f      \  }}}d|z  d|z  |j(                  j+                         |z  z  z
  }|j(                  j+                          z  }||j(                  j+                         z   dz  }t        j,                  |      }||||fS )zGQR-based dynamically weighted Halley iteration for polar decomposition.   )ordr   g      $@g       @c                0    ||z  }| |z
  }|dz  }||z  ||fS )N      ? )r.   r/   rS   rA   rR   r@   s         r   get_qr_paramsz_qdwh.<locals>.get_qr_params   s/    	AAAI5\F**r   c                     ||z  }| |z
  }|||fS Nr[   )r.   r/   rS   rA   rR   s        r   get_chol_paramsz_qdwh.<locals>.get_chol_params   s!    	AAAIq!r   d      gUUUUUU?g      ?rZ      c                    |\  }}| 
ddd      }nt        j                  || d      }|} |||      }d}	|rt        j                  ||z
        kD  }	||	fS )N   rW   F)keepdimsT)r   dynamic_index_in_dim
jnp_linalgnorm)kstate	update_fncoefstest_convergencer<   rF   r>   u_previs_not_convergedr_   r0   r=   tol_norms             r   	iterationz_qdwh.<locals>.iteration   su    DAq}q!Q'f''q5AfF!Q6"A#V4x?r   c                    |s| dfS t        j                  |      j                  j                        }t	        j
                  fd|i|}t        j                  dt        |      || df      S )NTrl   r   )	r   r!   astyper"   	functoolspartialr   	fori_loopr   )r<   rl   kwargsbodyrq   r   s       r   iteratez_qdwh.<locals>.iterate   sc    WnIIe##AGG,EY>e>v>D==CJq$i88r   Frl   rk   rm   Tc                B    | \  }}}t        j                  ||k        S r^   )r   logical_and)rj   ri   rF   ro   max_iterationss       r   cond_funz_qdwh.<locals>.cond_fun   s'    "Aq
??+Q-?@@r   c                L    | \  }}} |||fd t         d      \  }}|dz   ||fS )NTrz   rW   )rU   )rj   ri   r<   ro   rq   s       r   body_funz_qdwh.<locals>.body_fun   sI    "Aq
#		
A q5!%%%r   g      ?)floatr   finfor"   epsrg   rh   r   infr   rsqrtr   r   rs   cbrtappendrI   rU   r   
while_loopr:   r;   logical_not)!r   r0   r=   r}   r   one_norminf_normalpha_inverser<   ltol_lr\   CHOLESKY_CUTOFFqr_coefs
chol_coefsri   l2ddsqdr.   r/   rS   ry   rF   ro   r~   r   	num_itershis_convergedr_   rq   rp   s!   ````                          @@@r   _qdwhr   x   s    	[
QWW%))
*C__QA&(__QBFF+())H%		((;;-))HM1m<--

qww
''!	! *s
%XXe_(+
 /(*!	E	A!n,FA	
QB
q2vz
R
U	+B8
Cq2vQVS11u==A	
Q1qA	A	A	QRZABJ'A?oomAq!,-1a01 	
E	A!n,"9 
x7U
$!Q  z]T!A	& 
(mc*o%!#&>>1a!12$ )Q 
 
Aga13388:>**!cchhj1n!13388:~! !12,	
Ay,	&&r   )is_hermitianr}   r   )static_argnamesF)r   r}   r   dynamic_shapec               r   t        j                  t        |d      }|d}nt        j                  t        |d      }| j                  \  }}||k  rt        d      ||\  }}t        | ||f      } n||}}t        j                  d      5  t        | ||||      \  }	}
}}ddd       	
fS # 1 sw Y   xY w)a  QR-based dynamically weighted Halley iteration for polar decomposition.

  Args:
    x: A full-rank matrix, with shape `M x N`. The matrix may be padded up to
      that size from a smaller true shape (``dynamic_shape``).
    is_hermitian: True if `x` is Hermitian. Default to `False`. This parameter
      is currently unused, but exists for backward compatibility.
    eps: The final result will satisfy ``|x_k - x_k-1| < |x_k| *
      (4*eps)**(1/3)`` where `x_k` is the iterate.
    max_iterations: Iterations will terminate after this many steps even if the
      above is unsatisfied.
    dynamic_shape: the unpadded shape as an ``(m, n)`` tuple; optional.

  Returns:
    A four-tuple of (u, h, num_iters, is_converged) containing the
    polar decomposition of `x = u * h`, the number of iterations to compute `u`,
    and `is_converged`, whose value is `True` when the convergence is achieved
    within the maximum number of iterations.
  zbThe `is_hermitian` argument must be statically specified to use `qdwh` within JAX transformations.N
   zdThe `max_iterations` argument must be statically specified to use `qdwh` within JAX transformations.z1The input matrix of shape M x N must have M >= N.float32)
r   concrete_or_errorboolintr   
ValueErrorr   r   default_matmul_precisionr   )r   r   r}   r   r   rB   rC   r0   r=   r<   r   r   r   s                r   qdwhr      s    > ''
L <=, N++^ >?N 
$!QU
H
IIDAqa!QAaqA&&y1 H$)!Q>3$G!Aq)\H 
Ay,	&&H Hs   
B--B6)r   )r   r   r   r   r   )r   r   r}   z
int | Noner   zfloat | Noner   ztuple[int, int] | None)__doc__
__future__r   rt   r	   r   jax._srcr   r   r   r   r   r   jax._src.laxr
   r6   jax._src.numpyrg   r   r)   r1   rI   rU   r   ru   jitr   r[   r   r   <module>r      s   
 #        ! - /@:
A1*8p'h GGF !%,03' 3' 	3'
 
3' *3'3'r   