
    ukiMK                    B   d Z ddlmZ ddlmZmZ ddlmZmZ ddl	Z	ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z# ddlm
Z$ ddlm%Z% ddl&m'Z'm(Z(m)Z)m*Z* ddlm+Z, ddl+Z-e(e.cZ.Z/e)e0cZ0Z1ejd                  Z2ejf                  Z3ejh                  Z4d Z5d Z6	 	 d,dZ7d Z8d Z9d,dZ:	 	 	 	 d-dZ;	 	 	 	 	 	 	 	 d.d Z<d!d"	 	 	 	 	 d/d#Z=i Z>	 	 	 	 	 	 d0d$Z?d% Z@ e@ej                  d      e>ej                  <    e@ej                  d&d'      e>ej                  <    e@ej                  d(      e>ej                  <   	 	 d1d)ZDeDe>ej                  <   d2d*ZF	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d3d+ZGy)4ay  HLO interpreter for Pallas kernels.

The interpret mode for Pallas emulates the behavior of a Pallas kernel
by producing an equivalent HLO program. This involves several steps that
are carried out in stages:

1) Resolve Pallas-specific dtypes (e.g. Semaphores) to a suitable
 HLO type (e.g. int).
2) Discharge stateful operations.
3) Evaluate the body of the kernel in a loop.
    )annotations)IterableSequence)reducepartialN)Any)Callable)lax)slicing)conditionals)loops)core)frozen_dict)linear_util)source_info_util)partial_eval)
primitives)state)	discharge)typing)util)foreachsafe_mapsafe_zip
split_list)numpyc                    t        | d      r;t        | j                  d      r%| j                  j                  |       j                  S | S )a}  Converts logical dtypes into JAX dtypes for interpret mode.

  Logical types are dtypes that exist as part of the Pallas API but
  do not have an corresponding backing type in HLO (for example,
  a Semaphore dtype).

  This function maps a logical dtype to a valid HLO dtype that can be
  used to emulate the behavior of the logical dtype (such as mapping a
  Semaphore to int).
  _rulespallas_interpret_element_aval)hasattrr   r   dtyper!   s    Z/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/pallas/hlo_interpreter.py _logical_to_interpret_mode_dtyper$   B   s=     eXell;<<<55e<BBB	,    c                >   t        | t        j                        r't        | j                        }| j                  |      S t        | t        j                        rAt        | j                        }t        j                  | j                  || j                        S | S )N)
inner_aval)	weak_type)
isinstancer   AbstractRef$_logical_aval_to_interpret_mode_avalr'   updatejax_coreShapedArrayr$   r!   shaper(   )avalr'   inner_dtypes      r#   r+   r+   S   sq    e''(5dooFJ;;*;--h**+24::>K

K4>>RR	+r%   c                   t        d | D              } t        j                  || |      }t        t        j                  t        |            t        j                  |t        j                                 }t        j                  ||      S )Nc              3  f   K   | ])  }t        j                  |t         j                          + ywr"   Njnpasarrayint32.0ss     r#   	<genexpr>z!_dynamic_slice.<locals>.<genexpr>`   "     GCKK33G   /1)slice_sizesr"   )
tupler   dynamic_slicenparangelenarraybool_r
   squeeze)	start_idxblock_shapevalue
is_squeezeoutputsqueeze_dimss         r#   _dynamic_slicerN   ]   sn     GYGG)  	{K&ryyZ1"((:AC3K L M,	V\	**r%   c                    t        d | D              } t        d t        |      D              }t        j                  |||      }|j                  |k(  sJ t        j                  |||       S )Nc              3  f   K   | ])  }t        j                  |t         j                          + ywr4   r5   r9   s     r#   r<   z(_dynamic_update_slice.<locals>.<genexpr>h   r=   r>   c              3  *   K   | ]  \  }}|s|  y wN )r:   ibs      r#   r<   z(_dynamic_update_slice.<locals>.<genexpr>i   s      #tq! !  #s   )r@   	enumerater
   broadcast_in_dimr/   r   dynamic_update_slice)rH   rI   rJ   r,   rK   broadcast_dimss         r#   _dynamic_update_slicerZ   g   sh    GYGG) #y'< # #.^D&		$$	$		%	%eVY	??r%   c                   g }d}t        t        t        | |                  D ]J  \  }}t        j                  ||dz   |      }||k(  }|j                  t        j                  |d|             L t        t        |            S )NT   r   )reversedlistzipr6   whereappendr@   )gridindicesnext_indicescarrydim_sizeindexrT   s          r#   _get_next_indicesrh   q   s|    ,
%!$s4'9":; 0oh		%E*AME		%A./0 
x%	&&r%   c                .   t        d t        | j                  |      D              }|| j                  k7  r_t        d t        || j                        D              }t        j                  d| j
                        }t        j                  | ||      } | S )aL  Pads values so the shape evenly divides into block dimensions.

  For example, if values has a shape of (33, 2, 5) with a block_shape of
  (32, 2, 4), this function will pad the value of shape to (64, 2, 8).

  Args:
    value: Array to be padded.
    block_shape: Block shapes to use for padding.

  Returns:
    A padded array.
  c              3  >   K   | ]  \  }}|d z
  |z  d z   |z    yw)r\   NrS   )r:   vrU   s      r#   r<   z*_pad_to_block_dimension.<locals>.<genexpr>   s+      !%AA!|a1s   c              3  0   K   | ]  \  }}d ||z
  f  yw)r   NrS   )r:   arU   s      r#   r<   z*_pad_to_block_dimension.<locals>.<genexpr>   s     J41aq!A#hJs   rS   r/   r!   )constant_values)r@   r_   r/   r   uninitialized_valuer!   r6   pad)rJ   rI   padded_shape	pad_width	pad_values        r#   _pad_to_block_dimensionru   {   s|      ),U[[+)F , U[[ J3|U[[+IJJI..Ru{{KIGGE9i@E	,r%   c                8   |D ci c]  \  }}||
 }}}g }t        |       D ]n  \  }}||v r|j                  |||             "|j                  t        j                  |j                  j
                  |j                  j                               p |S c c}}w rR   )rV   ra   r   rp   
array_avalr/   r!   )	block_mappings_output
input_argsinput_output_aliaseskrk   oi_mapoutput_valsrT   bms	            r#   _initialize_output_valsr      s     22TQAqD2&2+./  eaF{F1I./77
--


--

  	  
 3s   Bjaxprc                p   ~|j                         5  t        | |      \  }}t        |j                        t        | j                        k(  sJ |j                  |j                     }|D cg c]  }|j
                   }}t        j                  ||      \  }	}
ddd       	
fS c c}w # 1 sw Y   xY w)z4Converts a Pallas kernel jaxpr to a valid HLO jaxpr.N)	trace_envresolve_physical_typesrD   invarsslice_scratch_opsr0   state_dischargedischarge_state)r   constsgrid_mappingbackend
phys_jaxprphys_constsscratch_invarsrk   scratch_avalsdischarged_jaxprdischarged_constss              r#   kernel_to_hlo_jaxprr      s      	!4UFCJ z  !S%6666&&|'E'EFN%34QVV4M4*9*I*IK+!''	! 
,m	;; 5	! 	!s   AB,,B'?B,'B,,B5T)propagate_source_infoc                  dfd}dfd}i t        || j                  |       t        || j                  |       t        j                  |       }| j
                  D ]|  }t        ||j                        }	t        j                         }
|
|j                  j                  z  }
|r|j                  j                  nd}t        j                  ||
      5  |j                  j                  5  |j                  t         v r't!        |j                     |g|	i |j"                  }nI|j                  j%                  |j"                        \  }} |j                  j&                  g ||	i |}ddd       ddd       |j                  j(                  rt        ||j*                         n ||j*                  d          t        j,                  ||        t        || j*                        S # 1 sw Y   xY w# 1 sw Y   xY w)a5  Evaluates a Jaxpr with recursion into higher-order primitives.

  ``recurse_hop_rule`` is a Jaxpr interpreter (translates a Jaxpr to a new
  Jaxpr) that will be called on sub-jaxprs of higher-order primitives, such
  as the body of a loop or branches of a conditional.

  Args:
    jaxpr: The Jaxpr to evaluate.
    consts: Consts that ``jaxpr`` closes over.
    *args: Input arguments to the ``jaxpr``.
    recurse_hop_rule: A Jaxpr interpreter to call on sub-jaxprs of
      higher-order primitives.
    propagate_source_info: Whether to propagate source info.
  c                Z    t        | t        j                        r| j                  S |    S rR   )r)   r-   Literalval)rk   envs    r#   readz"eval_jaxpr_recursive.<locals>.read   s%    q("2"23155?Q?r%   c                    || <   y rR   rS   )rk   r   r   s     r#   writez#eval_jaxpr_recursive.<locals>.write   s    CFr%   N)
name_stackr   )rk   zjax_core.Atomreturnr   )rk   zjax_core.Varr   r   r   None)r   	constvarsr   r-   	last_usedeqnsmapr   current_name_stacksource_infor   	tracebackuser_contextctxmanager	primitive_eval_jaxpr_hop_rulesparamsget_bind_paramsbindmultiple_resultsoutvarsclean_up_dead_vars)r   r   recurse_hop_ruler   argsr   r   lueqnin_valsr   r   anssubfunsbind_paramsr   s                  @r#   eval_jaxpr_recursiver      s   *@ "$#	%&)	%t$% "ZZ .c$

#G!446J#//,,,J-B))I		&	&j
* D+.77??D	/	/#CMM26&6*-**6  #}}<<SZZH cmm  C'CGC{CD D }}%%eS[[#&CKKNC S"-#.$ 
T5==	!!D D D Ds%   H.BG51H5G>:HH
	c           	        |D cg c]%  }t        t        t        j                  |            ' }}t	        j
                  |d|       }t	        j
                  ||dz   d       }g || j                  |}| j                  |      } t        j                  | j                  | j                  | j                  | j                        }| j                  |      } t        j                  t        j                  |       d      S c c}w )zPads a Jaxpr with constvars from all branches.

  For primitives that have multiple Jaxprs (e.g. cond_p), we need
  to pad each Jaxpr with all consts from all branches so the
  signatures match, but only use the consts for this branch.
  Nr\   )r   )effectsrS   )r@   r   r-   Varr   concatenater   replacepemake_jaxpr_effectsr   r   r   ClosedJaxprconvert_constvars_jaxpr)	r   rT   all_const_avalsconst_avalsunused_const_varsconst_prefixconst_suffixr   r   s	            r#   pad_jaxpr_constvarsr      s     +:;& S{;< ; ;!!"3BQ"78,!!"3AEF";<,>>u>>)
--)-
,%!!%//5<<"'--='
---
(%			b88?	DD;s   *Dc                (     	 d	 dd fd}|S )aO  Makes a rule for higher-order ops by recursively applying the jaxpr pass.

  Args:
    primitive: A JAX primitive.
    keys: The names of parameters which correspond to Jaxprs that need
      to be recursed over.

  Returns:
    A primitive rule for the edtype Jaxpr pass. This should be registered
    using `register_edtype_rule`.
  c                
   d}t        |t        j                        r|t        |j                        dkD  rt        d|        | |d      \  }}|rB|t        |||      }t        |      }||fS t        j                  |      }t        |      }||fS |}||fS t        |t        j                        r: | |j                  |j                        \  }}t        j                  ||      }||fS t        dt        |       d      )NrS   r   z+Cannot physicalize a jaxpr with constvars: zParameter of type z is not a Jaxpr.)r)   r-   JaxprrD   r   
ValueErrorr   r@   r   r   r   r   r   type)	interpreterrJ   
mapped_idx
extra_argsphysical_jaxprphysical_consts	new_jaxprr   
new_constss	            r#   _resolve_jaxprz%make_hop_rule.<locals>._resolve_jaxpr  s#    J%(	U__		!FugNOO(3E2(>%no	!).*4*9;) _-* j   00@)_-* j   #	 j   
E8//	0%ekk5<<@eZ&&uj9i j   +DK=8HIJJr%   c                    i }D ]  }||   }t        |t        j                        st        |t        j                        r  |      \  }}|||<   ||z   }St        |t              st        |t
              rRt        t         fd|t        t        |                   \  }}	t	        d |	D              }
t	        |      ||<   |
|z   }t        d| d|        |j                  |        j                  |i |S )Nc                     | |      S )N)r   rS   )xrT   r   r   s     r#   <lambda>z-make_hop_rule.<locals>.rule.<locals>.<lambda>2  s    ~k1C r%   c              3  .   K   | ]  }|D ]  }|   y wrR   rS   )r:   _argsnew_args      r#   r<   z.make_hop_rule.<locals>.rule.<locals>.<genexpr>3  s     SUS'WSWSs   z
Parameter z' is not a Jaxpr or sequence of Jaxprs: )r)   r-   r   r   r@   r^   r_   r   rangerD   r   r,   r   )r   r   r   
new_paramskeyrJ   r   r   mapped_jaxprsmapped_argsall_new_argsr   keysr   s   `          r#   rulezmake_hop_rule.<locals>.rule'  s   J [Ske	E8>>	*j
%%/' .{E B	:#
3D eU#z%'>%(#
CUERUV[R\L]+_ &`"{S+SS.
3d":cU*QRWQXYZZ[ MM*9>>4*6**r%   rR   )rJ   z%jax_core.Jaxpr | jax_core.ClosedJaxprrS   )r   r   r   r   s   `` @r#   make_hop_ruler     s!     !%!A!4+& 
+r%   
body_jaxpr
cond_jaxprbranchesc               t    |rt        d       | ||      \  }}t        j                  j                  |||dS )Nz:run_scoped interpret rule does not support collective axes)r   collective_axes)NotImplementedErrorr   run_scoped_pr   )r   r   r   r   r   r   s         r#   _run_scoped_physicalize_ruler   @  sH    
D  %0v$>!./		 	 	%	%no
 r%   c                    t        j                  | |      j                  }t        t	        t
        |            }t        t        | |t              }t        j                  || j                        }t        j                  ||      \  }}}||fS )N)r   )
debug_info)r-   r   in_avalsr@   r   r+   r   r   r   r   	wrap_initr   r   trace_to_jaxpr_dynamic)r   r   kernel_avals
interp_funwrappedr   _r   s           r#   r   r   N  s    %%eV4==,s?)+ ,,E6-/* LL0@0@A'66|)Q
	J	r%   c           
     2	  %&'()*+,-. ~~~~~	~
|j                   }t        |j                  g      \  }}t        |      (t	        (fdj
                  D              )t        (d       J t        |d|       \  '&}|r$t        d|j                   d       t        '       t        j                  ||      }|j                     -|t        -      d  }t	        d |D              .g }t        t        j                   ||      j"                        D ]  \  }}|j$                  D cg c]*  }t'        |t(        j*                        r|j,                  nd, }}|jt/        d |D              rX|rt1        d	      t3        j4                  d|j6                  
      }t9        j:                  |||D cg c]	  }g |d c}      }|j=                  |        j"                  D cg c]!  }t)        j>                  |j$                        # c}%j"                  D cg c]  }t	        d |j$                  D                c}*|D ]  }|j@                  } tC        tD        |%      }|jG                  .       t        |      t        |      z   +tI        jJ                  d      ft        )      z  })rtM        tH        jN                  )      ,nd,,fd}%&')*+-.f	d}tQ        jR                  ||tI        jJ                  d      |g|      ^}}}|t        |      t        |      t        |      z    }g }t        |j                        D ]  \  } }|j$                  D cg c]*  }t'        |t(        j*                        r|j,                  nd, }}|lt/        d |D              rZ|rt1        d	      t        | \  }!}"t        | jT                  |"      D #cg c]
  \  }#}|#|z
   }$}#}tW        jX                  | |!|$      } | jT                  |jZ                  jT                  k7  r8tW        jX                  | d| j\                  z  |jZ                  jT                        } |j=                  |        " |S c c}w c c}w c c}w c c}w c c}w c c}}#w )Nc              3  \   K   | ]#  }|t         j                  ur|n
t               % y wrR   )pallas_coredynamic_grid_dimnext)r:   rm   dynamic_grid_args_iters     r#   r<   z,pallas_call_hlo_interpret.<locals>.<genexpr>q  s6        K000a&'(s   ),rS   )r   z(
Jaxpr of the the kernel in pallas_call :c              3  p   K   | ].  }t        j                  |j                  |j                         0 y wrR   )r   rp   r/   r!   )r:   rm   s     r#   r<   z,pallas_call_hlo_interpret.<locals>.<genexpr>  s*      ;<j$$QWWagg6s   46r   r   c              3  &   K   | ]	  }|d k7    ywr   NrS   r:   ps     r#   r<   z,pallas_call_hlo_interpret.<locals>.<genexpr>       "@11;"@   z$Padding with aliasing not supported.rn   r   c              3  P   K   | ]  }t        |t        j                           y wrR   )r)   r   Squeezed)r:   bds     r#   r<   z,pallas_call_hlo_interpret.<locals>.<genexpr>  s     JRJr;//0Js   $&r\   c                    | ^}}|k  S rR   rS   )re   rT   r   num_iterationss      r#   condz'pallas_call_hlo_interpret.<locals>.cond  s    EA~r%   c           	     Z  	 | ^}}}j                   j                  |      }n't        fdt        t        |            D              }t	        |g      \  }}t        j                  |      5  D ]a  }t        |j                  t        j                        s(t        j                  |      }|j                  t        j                        |_        c j                   D 	cg c]  }	 |	j"                  |g  }
}	d d d        t%        t&        
|      }t        j                  |      5  t)        j*                        t)              t)        |      z   t)              z   k(  s9J t)        j*                        t)              t)        |      t)              f       t        j,                  g|| }d d d        t	        |j.                  g      \  }}}t%        t0        |
||      }|dz   t3        |      g||S c c}	w # 1 sw Y   xY w# 1 sw Y   axY w)Nc              3  r   K   | ].  \  }\  }}|j                   vrt        j                  ||       0 y wrR   )vmapped_dimsr   GridAxis)r:   dimidxrU   r   s       r#   r<   z:pallas_call_hlo_interpret.<locals>.body.<locals>.<genexpr>  s<      c8C111 

sA
&s   47r"   r\   )local_grid_envr@   rV   r_   r   r   grid_envr)   r!   r-   bintget_avalr,   r6   r8   r0   block_mappingscompute_start_indices_interpretr   rN   rD   r   
eval_jaxprnum_index_operandsrZ   rh   )re   rT   loop_idxcarry_blocksr  carry_consts_insscratchr;   r0   r~   start_indicesblocksr   	out_inoutout_scratch	out_carryblock_shapesr   r   rb   r   is_squeeze_dimnum_inout_blocksscalarsscratch_valuess                   r#   bodyz'pallas_call_hlo_interpret.<locals>.body  sO   !&Ax,"".#228TBn (Xt)<= n !+<:J9K Lg			n	-  0!aggx}}-""1%$;;SYY;/!&0 !// -"
,
,X
@
@m  !>3F			n	- !(()S\CK-G#
K .   %%
&
g,
f+
n
	
  ""
-07:@CJf !+002BC!EAy+)=,$iAIE$T84 &&$& &1  s2   2*HA
H'HH3BH!HH!H*c              3  &   K   | ]	  }|d k7    ywr   rS   r  s     r#   r<   z,pallas_call_hlo_interpret.<locals>.<genexpr>  r  r  )r   )/r   r   num_dynamic_grid_boundsiterr@   rb   r   r   printfunc_src_infor   rx   slice_index_opsrD   r_   	itertoolschainr  rI   r)   r   Elementpaddinganyr   r   rp   r!   r
   rq   ra   _get_block_shaper0   r   ru   extendr6   r8   r   multiplyr   
while_loopr/   r   slicerw   ndim)/r   r   debugrz   r   meshcompiler_paramscost_estimate	out_avalsmetadatanamer   r   dynamic_grid_argsr   out
block_argsre   r   r~   r  r1  rt   r  carry_elementr0   grid_start_indicesr
  r'  r   out_out	out_nopadopad_lowpad_highr;   limit_indicesr"  r   r   r   rb   r#  r$  r	  r%  r&  s/       `                                @@@@@@@@@@r#   pallas_call_hlo_interpretrK  [  s]    O]Ix* '
\112T   12	     
$
 
$d	+	33	37JRw804%}
	5j6N6N5Oq
QR	
 B B $&:	<# --.'CLM"*  @M . %9??:s3\5P5PQ ea) (K,?,?@rzzfL )G )s"@"@@	!"HII00rIi
''!Y' :Q1a :
;a	LLO )779 ..r~~> 9, ++
 J2>>JJ.  mD %ul
;%,,~_s3x/		!T2	CLL$/N N
+& +&Z ##
D399Q<!3<e<.1a% #j/#j/CH"<=')7L>>? ea) (K,?,?@rzzfL )G )s"@"@@	!"HIIw-gx),QWWh)?@Aq1u@m@
--7M
2aww"--%%%
--4!&&="--*=*=
>aQ 
M) !;9`) As$   ./Q:6Q?,&R##R	*/R R)rI   ztuple[int, ...])rx   zIterable[BlockMapping]r   zSequence[jax_typing.Array])
r   jax_core.Jaxprr   Sequence[Any]r   GridMappingr   
str | Noner   z3tuple[jax_core.Jaxpr, Sequence[Any], Sequence[Any]])r   rL  r   zOCallable[[jax_core.Jaxpr, Sequence[Any]], tuple[jax_core.Jaxpr, Sequence[Any]]]r   z	list[Any])r   rL  rT   intr   rM  r   zjax_core.ClosedJaxpr)r   rL  )r   rL  r   rM  )r   rO  r   rL  r9  boolrz   ztuple[tuple[int, int], ...]r   rN  r:  zpallas_core.Mesh | Noner;  r   r<  CostEstimater=  z"tuple[jax_core.AbstractValue, ...]r>  z'frozen_dict.FrozenDict[str, str] | Noner?  rO  )H__doc__
__future__r   collections.abcr   r   	functoolsr   r   r.  r   r   r	   jax._src.laxr
   r   jax._src.lax.control_flowr   r   jax._srcr   r-   r   r   r   r   jax._src.interpretersr   r   jax._src.pallasr   r   r   jax._src.stater   r   
jax_typingr   jax._src.utilr   r   r   r   r   r6   rB   r   
unsafe_mapr_   
unsafe_zipBlockMappingrN  rR  r$   r+   rN   rZ   rh   ru   r   r   r   r   r   r   scan_pwhile_pcond_pr   r   r   rK  rS   r%   r#   <module>re     sO  
 # . %   $    2 + %   & % 4 / &  7 )   " CZCZ''%%''"+++@'.1)C< -<&1< ",< 
=	<4 1"1"E	1" $-1"h  EE)6E 2E,9v '4ELL'&J ell #'4	MM<(/ emm $-:<;N;NPZ-[ l)) *	!/	 2N j-- .
TT T 	T
 6T T "T T  T 2T 6T Tr%   