
    uki                    t   U d Z ddlmZ ddlmZ ddlmZ ddlZddlm	Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ej8                  ej:                  z  Zded<   ddZ 	 	 	 	 	 	 	 	 	 	 	 	 ddZ! e
jD                  d      Z#de#_$        e#jJ                  	 	 	 	 d d       Z& ejN                  e#      	 	 d!d       Z(d"dZ) e
jD                  d      Z*de*_$        e*jJ                  d#d       Z+ ejN                  e*      d!d       Z,dddddd	 	 	 	 	 	 	 	 	 	 	 	 	 d$dZ-ddd	 	 	 	 	 	 	 	 	 d%dZ.y)&z'Module for GPU-specific JAX primitives.    )annotations)Sequence)	TypeAliasN)core)state)gpu)dialect)
primitives)lowering)mlirr   Refc           
        | j                   t        j                  k(  rd}d}n]| j                   t        j                  k(  rd}d}n;| j                   t        j                  k(  rd}d}nt        d| j                    d      t        || gd| d	| d
t        j                  | j                  | j                         g      \  }|S )zElementwise approximate hyperbolic tangent: :math:`\mathrm{tanh}(x)`.

  See
  https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-tanh.
  ztanh.approx.f16 $0, $1;hztanh.approx.bf16 $0, $1;ztanh.approx.f32 $0, $1;fzapprox_tanh does not accept z arrays=,   )argsconstraintspackresult_shape_dtypes)
dtypejnpfloat16bfloat16float32	TypeErrorelementwise_inline_asmjaxShapeDtypeStructshape)xasm
constraintresults       \/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/pallas/triton/primitives.pyapprox_tanhr'   $   s     WW
#CJww#,,
$CJww#++
#CJ
2177)7C
DD#	3j\:,///AB(6 
-    c          	     D    t        j                  || ||t        |      dS )a!  Inline assembly applying an elementwise operation.

  Args:
    asm: The assembly code to run.
    args: The arguments to pass to the assembly code.
    constraints: LLVM inline assembly `constraints
      <https://llvm.org/docs/LangRef.html#inline-asm-constraint-string>`_.
    pack: The number of elements from each argument expected by a single
      instance of the assembly code.
    result_shape_dtypes: The shapes and dtypes of the results produced by the
      assembly code.

  Returns:
    The results produced by the assembly code.
  )r#   r   r   r   )elementwise_inline_asm_pbindtuple)r#   r   r   r   r   s        r&   r   r   @   s,    . 
"	&	&
 34
 r(   r*   Tc           	         ~t        d t        ||dd        D              st        d      | D cg c],  }t        j                  |j
                  |j                        . c}S c c}w )Nc              3  T   K   | ]   \  }}|j                   |j                   k(   " y wN)r!   ).0r"   ys      r&   	<genexpr>z8_elementwise_inline_asm_abstract_eval.<locals>.<genexpr>i   s!     BDAqQWWBs   &(r   z@All arguments of elementwise_inline_asm must have the same shape)allzip
ValueErrorjax_coreShapedArrayr!   r   )r   avalskwargsss       r&   %_elementwise_inline_asm_abstract_evalr;   d   s^     	BCuQRy,AB	B
J  ;N	NQ(

qww
0	NN	Ns   1A$c                   ~t        j                  g t        t        j                  | j
                        ||d||      j                  S )NT)r   purepacked_elementr   )
tt_dialectElementwiseInlineAsmOpmapr   aval_to_ir_type	avals_outr%   )ctxr#   r   r   r   r   s         r&    _elementwise_inline_asm_loweringrE   p   sI     		*	*1D  #--01	
 Fr(   c                 *    t         j                         S )z/Synchronizes all kernel executions in the grid.)debug_barrier_pr+    r(   r&   debug_barrierrI      s    				r(   rG   c                      y)NrH   rH   rH   r(   r&   _debug_barrier_abstract_evalrK      s    	r(   c                0    ~ t        j                          g S r/   )gpu_dialectbarrier)rD   s    r&   _debug_barrier_loweringrO      s    		)r(   Fmaskothercache_modifiereviction_policyvolatilec          	     :    t        j                  | d|||||      S )a:  Loads an array from the given ref.

  If neither ``mask`` nor ``other`` is specified, this function has the same
  semantics as ``ref[idx]`` in JAX.

  Args:
    ref: The ref to load from.
    mask: An optional boolean mask specifying which indices to load. If mask is
      ``False`` and ``other`` is not given, no assumptions can be made about the
      value in the resulting array.
    other: An optional value to use for indices where mask is ``False``.
    cache_modifier: TO BE DOCUMENTED.
    eviction_policy: TO BE DOCUMENTED.
    volatile: TO BE DOCUMENTED.
  NrP   )pallas_primitivesload)refrQ   rR   rS   rT   rU   s         r&   rX   rX      s,    0 
			
#%
 r(   rQ   rT   c               6    t        j                  | d|||      S )zrStores a value to the given ref.

  See :func:`~jax.experimental.pallas.load` for the meaning of the arguments.
  NrZ   )rW   store)rY   valrQ   rT   s       r&   r\   r\      s&     
	 	 	
	%
 r(   )r"   	jax.Arrayreturnr^   )r#   strr   Sequence[jax.Array]r   r`   r   intr   zSequence[jax.ShapeDtypeStruct]r_   ra   )r8   zjax_core.ShapedArrayr_   Sequence[jax_core.ShapedArray])rD   zlowering.LoweringRuleContext)r_   None)r_   rc   )rY   r   rQ   jax.Array | NonerR   zjax.typing.ArrayLike | NonerS   
str | NonerT   rf   rU   boolr_   r^   )
rY   r   r]   r^   rQ   re   rT   rf   r_   rd   )/__doc__
__future__r   collections.abcr   typingr   r   jax._srcr   r6   r   jax._src.lib.mlir.dialectsr   rM   jax._src.lib.tritonr	   r?   jax._src.pallasr
   rW   jax._src.pallas.tritonr   jax.interpretersr   	jax.numpynumpyr   AbstractRefTransformedRefr   __annotations__r'   r   	Primitiver*   multiple_resultsdef_abstract_evalr;   register_loweringrE   rI   rG   rK   rO   rX   r\   rH   r(   r&   <module>r{      s   . " $  
 %  9 5 ; + !  ""U%9%99Y 98	  	
  8 @ .8--.HI ,0  ) ++O O#O ,O 45	% 6& 
 %($$%67#'  "" # O, - ")-!%"& 	    '	 
         N ""&		 	
   
r(   