
    uki'o                     b    d dl mZ d dlmZ d dlZd dlmZ d dlZ	d dl
mZ d dl
mZ d dl
mZ d dl
mZmZ d d	l
mZ d d
l
mZ d dl
mZ d dl
mZ d dl
mZ d dl
mZ d dl
mZ d dl
mZ d dlmZmZmZmZmZmZmZ m!Z!m"Z"m#Z# d dl$m%Z% dZ& ejN                  ejP                  jS                         ejP                  jS                         ejP                  jS                         ejP                  jS                         ejP                  jS                         ejP                  jS                         ejP                  jS                         ejP                  jS                         ejP                  jS                         ejP                  jS                         ejP                  jS                         ejT                  gejP                  jS                         ejP                  jS                         ejP                  jS                         e!jP                  jS                         e"jP                  jS                         e#jP                  jS                               D ](  Z+ e,e+ejZ                        s e%j\                  e+       * de%j^                  de%j`                  fdZ1  e%jd                  ejf                        e1         e%jd                  ejh                        e1         e%jd                  ejj                        e1         e%jd                  ejl                        e1         e%jd                  ejn                        e1         e%jd                  ejp                        e1         e%jd                  ejr                        e1         e%jd                  ejt                        e1         e%jd                  ejv                        e1         e%jd                  ejx                        e1         e%jd                  ejz                        e1         e%jd                  ej|                        e1         e%jd                  ej~                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  ej                        e1         e%jd                  e"j                        e1         e%jd                  e"j                        e1         e%jd                  e"j                        e1         e%jd                  e"j                        e1         e%jd                  e"j                        e1         e%jd                  e"j                        e1         e%jd                  e"j                        e1        e%j\                  ej                         de%j^                  de%j`                  fdZX  e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX         e%jd                  ej                        eX       de%j^                  deide%j`                  fdZj  e%jd                  ej                        ej         e%jd                  ej                        ej         e%jd                  ej                        ej         e%jd                  ej                        ej        e%jd                  ej                        de%j^                  deide%j`                  fd       Zp e%jd                  ej                        de%j^                  dej                  de%j`                  fd       Zsdeideideid eid!eid"eid#eteieif   deifd$Zud%ej                  d&e%j                  d'e%j                  d(e%j                  d)eei   d#eeteieif      d*eei   d+eei   deifd,Zxd&e%j                  d'e%j                  d(e%j                  d)eei   d#eeteieif      d*eei   d+eei   dej                  d-eideifd.Zz e%jd                  ej                        de%j^                  d)eei   d#eeteieif      d*eei   d+eei   dej                  d-eide%j`                  fd/       Z|de%j^                  d0ete}d1f   de%j`                  dz  fd2Z~d3d4de%j^                  d0ete}d1f   d5ede%j`                  fd6Z  e%jd                  e j                        d7          e%jd                  e j                        d8        d9e!j                  d:eid;eideifd<Z e%jd                  e!j
                        de%j^                  d9e!j                  de%j`                  fd=       Zde%j^                  de%j`                  fd>Z  e%jd                  e!j                        e         e%jd                  e!j                        e         e%jd                  e!j                        e         e%jd                  e!j                        e         e%jd                  e!j                        e         e%jd                  e!j                        e       de%j^                  d0ete}d1f   de%j`                  fd?Z  e%jd                  e j                        e         e%jd                  e j                         e        e%jd                  e j"                        de%j^                  d0ete}d1f   de%j`                  fd@       Z e%jd                  e j&                        de%j^                  dAete}d1f   de%j`                  fdB       Z e%jd                  e j*                        de%j^                  dAete}d1f   dCeteteieif   d1f   de%j`                  fdD       Z e%jd                  ej.                        de%j^                  d0eteid1f   de%j`                  fdE       Z e%jd                  ej2                        de%j^                  de%j`                  fdF       Z e%jd                  ej6                         e%jd                  ej8                        de%j^                  de%j`                  fdG              Z e%jd                  ej<                        de%j^                  de%j`                  fdH       Zy)I    )defaultdict)replaceN)Sequence)api)ad_checkpoint)ad_util)coreutil)dispatch)ops)pjit)prng)random)	shard_map)callback)	debugging)
anncontrol_flowconvolutionfftlaxlinalgparallelslicingspecialwindowed_reductions)roofline   ctxreturnc                 P   d | j                   D        \  }t        j                  j                  | j                  d         }t        j
                  |j                  |j                  j                  |j                  z  |j                  j                  |j                  z  z         S )Nc              3   Z   K   | ]#  }t         j                  j                  |       % y wNr   RooflineShape	from_aval.0avals     ^/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/experimental/roofline/rooflines.py	<genexpr>z$_unary_p_roofline.<locals>.<genexpr>N   !     	JT(
 
 
*
*4
0	J   )+r   unfused_flopsunfused_hbm_bytes	avals_inr   r%   r&   	avals_outRooflineResultsizedtypeitemsize)r   argskwxouts        r*   _unary_p_roofliner<   I   s|    
 
KS\\	J$1((q)9:#		 	 FF
''

QVV
#cii&8&8388&C
C
     c                 Z   d | j                   D        \  }}t        j                  |j                  |j                  d      D cg c]  \  }}t	        ||       }}}t
        j                  j                  | j                  d         }t        j                  t        t        j                  |            |j                  j                  |j                  z  |j                  j                  |j                  z  z   |j                  j                  |j                  z  z         S c c}}w )Nc              3   Z   K   | ]#  }t         j                  j                  |       % y wr#   r$   r'   s     r*   r+   z%_binary_p_roofline.<locals>.<genexpr>   !     Nh$$..t4Nr-      )	fillvaluer   r.   )r2   itzip_longestshapemaxr   r%   r&   r3   r4   intnpprodr6   r7   r5   )	r   r8   r9   lhsrhslrbroadcasted_shaper;   s	            r*   _binary_p_rooflinerO   ~   s    
 ON(#sNN399cii1MAqc!Qi  	((q)9:#		 	  123
))

sxx
'II)*II)*
 	s   D'axisc                V   d | j                   D        \  }t        j                  j                  | j                  d         }t        j
                  |j                  |   |j                  j                  |j                  z  |j                  j                  |j                  z  z         S )Nc              3   Z   K   | ]#  }t         j                  j                  |       % y wr#   r$   r'   s     r*   r+   z)_cumulative_p_roofline.<locals>.<genexpr>   r,   r-   r   r.   
r2   r   r%   r&   r3   r4   rE   r6   r7   r5   r   rP   r8   r9   r:   r;   s         r*   _cumulative_p_rooflinerU      s     
KS\\	J$1((q)9:#		 	 GGDM
''

QVV
#cii&8&8388&C
C	
 r=   c                \   d | j                   D        \  }t        j                  j                  | j                  d         }t        j
                  |j                  |   dz  |j                  j                  |j                  z  |j                  j                  |j                  z  z         S )Nc              3   Z   K   | ]#  }t         j                  j                  |       % y wr#   r$   r'   s     r*   r+   z+_cumlogsumexp_p_roofline.<locals>.<genexpr>   r,   r-   r   r   r.   rS   rT   s         r*   _cumlogsumexp_p_rooflinerX      s     
KS\\	J$1((q)9:#		 	  GGDMA%
''

QVV
#cii&8&8388&C
C
 r=   dimension_numbersc                   d | j                   D        \  }}t        j                  j                  | j                  d         }|\  \  }}\  }	}t
        |j                  z  |j                  z  t        j                  |D 
cg c]  }
|j                  |
    c}
      z  t        j                  |	D 
cg c]  }
|j                  |
    c}
      z  }d}| j                  s||j                  z  }||j                  z  }| j                  s||j                  z  }t        j                  t        |      t        |      ||      S c c}
w c c}
w )Nc              3   Z   K   | ]#  }t         j                  j                  |       % y wr#   r$   r'   s     r*   r+   z(_dot_general_roofline.<locals>.<genexpr>   r@   r-   r   )flopsr/   	hbm_bytesr0   )r2   r   r%   r&   r3   _FMA_FLOPS_FACTORr5   rH   rI   rE   pin_lhs_in_vmembytespin_rhs_in_vmemr4   rG   )r   rY   r8   r9   rJ   rK   r;   lhs_contract_	lhs_batchir\   r]   s                r*   _dot_general_rooflinerf      s)    ON(#s((q)9:#&7#<^i 	hh	hh 	gg\2syy|234 	ggY/syy|/0	1  )			II			I		 	 JJ!	
  3/s    E
1E
window_dim_stridebase_dilationwindow_dilationkernel_limitinput_limitoutput_limitpaddingc                    |d   }|d   }||k(  r-||k(  r(||k(  r#|dk(  rt        d|dz
        | k(  r|dk(  r|dk(  r|S |dk(  r&||k(  r!|dk(  r|dk(  r| dk(  r||dz
  k(  r
||dz
  k(  r|S d}	t        |      D ]  }
| dk(  r;|dk(  r6||
|z  z
  }t        ||z   |      }t        d|      }|	t        ||z
  d      z  }	Ct        |      D ]7  }|| z  |z
  |
|z  z   }t        ||z        }|||z  k7  r(|dk  s||k\  r3|	dz  }	9  |	S )a  Gets the valid position count for conv for a single spatial dimension.

  Args:
    window_dim_stride: The stride of the window along this dimension.
    base_dilation: The base dilation factor along this dimension.
    window_dilation: The window dilation factor along this dimension.
    kernel_limit: The size of the kernel along this dimension.
    input_limit: The size of the input along this dimension.
    output_limit: The size of the output along this dimension.
    padding: The padding applied to the input along this dimension.
  r   rA   )rF   rangeminrG   )rg   rh   ri   rj   rk   rl   rm   padding_lowpadding_highvalid_position_count
kernel_idxundilated_index_baseupper_limitlower_limit
output_idxundilated_indexlhs_spatial_indexs                    r*   -_get_spatial_valid_position_count_for_one_dimr{      s   ( 
+, \!
,
&

&
Q

aq
!%6
6


!
 Q
,
&
Q

1

q
 
)
)
,*
*,'   j A-1"4(:+GG
,
,
k /0kc+";Q?? L)  
 (
(()  o=> 
-=	=	Q	"3{"Ba#   B 
r=   dnumsrJ   rK   r;   window_strideslhs_dilationrhs_dilationc                 8   | j                   dd | j                  dd | j                  dd }
}	}d}t        t	        |            D ]S  }|t        ||   ||   ||   |j                  |	|      |j                  ||      |j                  |
|      ||         z  }U |S )a  Gets the number of valid spatial positions for conv_general_dilated.

  Args:
    dnums: The dimension numbers for the convolution.
    lhs: The shape of the left-hand side of the convolution.
    rhs: The shape of the right-hand side of the convolution.
    out: The shape of the output of the convolution.
    window_strides: The stride of the window along each spatial dimension.
    padding: The padding applied to the input along each spatial dimension.
    lhs_dilation: The dilation factor for the left-hand side along each spatial
      dimension.
    rhs_dilation: The dilation factor for the right-hand side along each spatial
      dimension.
  r   NrA   )rg   rh   ri   rj   rk   rl   rm   )lhs_specrhs_specout_specro   lenr{   rE   )r|   rJ   rK   r;   r}   rm   r~   r   input_spatial_dimskernel_spatial_dimsout_spatial_dimsvalid_position_countsds                r*   !_get_spatial_valid_position_countr   H  s    2 nnQRnnQRnnQR ,<)  '() 	aJ(+"1o$QYY2156II034YY/23
 	 
r=   batch_group_countc	           
      p   t        j                  | j                  |j                  |      }	t        |	| ||||||      }
| j                  |	j                  d      }|j                  |	j
                  d      }|j                  |	j                  d      }||z  }||z  |z  }||
z  }|t        z  }t        |      S )zCalculates roofline unfused flops for Jax's conv_general_dilated primitive.

  See `jax.lax.conv_general_dilated` for details on the arguments.
  r   rA   )	r   conv_dimension_numbersrE   r   r   r   r   r^   rG   )rJ   rK   r;   r}   rm   r~   r   rY   r   r|   spatial_valid_position_countsbatchnum_output_featuresnum_input_featuresnum_output_batchnon_spatial_dims_factor	fma_countr\   s                     r*   _calculate_conv_flopsr   w  s     
,
,	ii-% #DS#sNG\<# ))ENN1%
&%		%.."34yy!23.. ..1AA  &(EE)
'
'%	Ur=   c                   d | j                   D        \  }	}
t        j                  j                  | j                  d         }t        j
                  t        |	|
|||||||	      |	j                  j                  |	j                  z  |
j                  j                  |
j                  z  z   |j                  j                  |j                  z  z         S )zyRoofline for Jax's conv_general_dilated primitive.

  See `jax.lax.conv_general_dilated` for details on the arguments.
  c              3   Z   K   | ]#  }t         j                  j                  |       % y wr#   r$   r'   s     r*   r+   z1_conv_general_dilated_roofline.<locals>.<genexpr>  r@   r-   r   r.   )
r2   r   r%   r&   r3   r4   r   r6   r7   r5   )r   r}   rm   r~   r   rY   r   r8   r9   rJ   rK   r;   s               r*   _conv_general_dilated_roofliner     s      ON(#s((q)9:#		 	 )









 ))

sxx
'II)*II)*
 r=   axes.c                 &   | j                   sJ t        j                  |D cg c]  }| j                   j                  |    c}      }|dkD  ry t	        j
                  |D ci c]  }|d c}|D ci c]  }|d c}      S c c}w c c}w c c}w )NrA   r   	ici_bytesici_latency)meshrH   rI   rE   r   r4   )r   r   rP   	axes_sizes       r*   _return_zeros_if_one_sized_axisr     s     
/gg=sxx~~d+=>)]		 	 #'(4tQw(%)*Tq*
  > )*s    B"
B	2
B
T)	is_reducer   c          
      T   t        | |      x}r|S | j                  sJ | j                  j                  t        j                  j                  | j                        }|r)|t        j                  |D cg c]  }|   	 c}      z  }t        |fdd      }t        |      }	d}
||	z  }|D ]  }|   }|
||dz
  z  z  }
||z  } |d      |	z  }t        j                  |D ci c]  }|t        |
       c}|D ci c]  }|t        |       c}      S c c}w c c}w c c}w )Nc                     |    S r#    r:   r   s    r*   <lambda>z+_ring_collective_roofline.<locals>.<lambda>  s    47 r=   T)keyreverser   rA   r   )r   r   rE   r   r%   total_bytesr2   rH   rI   sortedr   r4   rG   )r   r   r   r8   r9   zeros_resultcurrent_shard_sizerP   sorted_axesnum_axesr   	axis_sizer   r   s                @r*   _ring_collective_roofliner     s=    5S$??\?	/	$--99#,,G"''$"?$4:"?@@ t!2DA+()! $dT
I#y1}55I)#$ [^$x/+		 	 0;<tS^#<4?@Ds;''@
 ' #@( =@s   5D
'D  D%
c                     t        |d| i|S )Nr   r   	axis_namer8   r9   s      r*   r   r     s    !:D!Wy!WTV!W r=   c                     t        || dd|S )NFr   r   r   r   s      r*   r   r     s    !:		U".0" r=   modeindices_sizeoutput_sizec                 V    | t         j                  j                  k(  r|dz  }|}||z   S y)z=Calculates roofline unfused flops for Jax's gather primitive.   r   )r   GatherScatterModeFILL_OR_DROP)r   r   r   index_check_flopsoutput_mask_flopss        r*   _calculate_gather_flopsr     s<     
W&&333 %q( $000	
r=   c                   d | j                   D        \  }}t        j                  j                  | j                  d         }|j
                  j                  |j                  z  dz  |j
                  j                  |j                  z  z   }t        j                  t        ||j                  |j                        |      S )Nc              3   Z   K   | ]#  }t         j                  j                  |       % y wr#   r$   r'   s     r*   r+   z#_gather_roofline.<locals>.<genexpr>%  s!     P4&&006Pr-   r   r   r.   )
r2   r   r%   r&   r3   r6   r7   r5   r4   r   )r   r   r8   r9   rc   indicesr;   r0   s           r*   _gather_roofliner     s     Q3<<P*!W((q)9:#
 
ii388#a''--*@*@7<<*OO  
	 	 +D',,I)
 r=   c                   
 d | j                   D        \  }}}|j                  d      
d}
rp
fd}
j                  D cg c]"  }t        j                  d|j
                        $ }} t        j                  |      | \  }}	|	j                  |j                  z  }t        j                  |d|j
                  j                  z  |j                  z  |j
                  j                  |j                  z  z         S c c}w )a  Roofline for Jax's `scatter*` primitives.

  The `scatter` functionality itself is a simple data read and write, which
  contributes 0 flops.

  But, the jaxpr for each `scatter*` function (aside from `jax.lax.scatter`)
  contains an `update_jaxpr` that gets applied to the operand & scattered
  updates (e.g. `add` for `scatter_add`, or arbitrary unary function for
  `scatter_apply`), which *does* contribute flops. This `update_jaxpr` gets
  applied to every element of the scattered updates.

  Thus,
  flops = [# flops for `update_jaxpr` per element] * [# elements in `updates`].

  To calculate # flops for `update_jaxpr`, we convert the `update_jaxpr` back to
  a callable, and then call `roofline` on that callable. `update_jaxpr` does not
  contain any information about input shapes or dtypes; it expects scalars. It
  will therefore give us a # flops-per-element result, which we multiply by
  the size of the updates to get the total flops.
  c              3   Z   K   | ]#  }t         j                  j                  |       % y wr#   r$   r'   s     r*   r+   z$_scatter_roofline.<locals>.<genexpr>M  s&      15h&&t,r-   update_jaxprr   c                  2    t        j                  g g|  S r#   )r	   
eval_jaxpr)inputsr   s    r*   r   z#_scatter_roofline.<locals>.<lambda>U  s    b J6 J r=   r      r.   )r2   getinvarsr   ShapeDtypeStructr6   r   r/   r5   r4   r7   )r   r8   r9   rc   r   updatesr\   	update_fndummy_inputsroofline_resultr   s             @r*   _scatter_roofliner   4  s    29<1gw ',
%JI :F9L9L45R/L  6**95|DA))GLL8E		 	  gmm$$
$w||
3MM""W\\12
 s   'C=c          
         | j                   D cg c]!  }t        j                  j                  |      # }}t	        | |D cg c]"  }t        j                  d|j                        $ c}      } t        | g||dd|S c c}w c c}w )N)rA   )r2   Fr   )	r2   r   r%   r&   r   r	   ShapedArrayr6   r   )r   r   r8   r9   r)   shapesrE   s          r*   _scalar_collective_roofliner   r  s|     @C||LtH"",,T2L&LPVWut//ekkBWX#	"3	P	PDE	PR	PP MWs   &B'B
c                    t        | g|d|i|}dt        t        t        f   dt        t        t        f   fd}t	        j
                   ||j                         ||j                              S )Nr   r   r    c                 \    | j                         D ci c]  \  }}||dz   c}}S c c}}w )Nr   )items)r   kvs      r*   double_dictz$_psum2_roofline.<locals>.double_dict  s(    !"+AAq1uH+++s   (r   )r   dictstrrG   r   r4   r   r   )r   r   r8   r9   ring_roofliner   s         r*   _psum2_roofliner     sq     ,CH$HTHRH-,T#s(^ ,S#X , 
	 	 -112M556
 r=   r   c          
      \   t        | |      x}r|S | j                  sJ | j                  j                  t        j                  j                  | j                        t        j                  |D cg c]  }|   	 c}      z  }t        |fd      d   }t        |      }|   |dz
  z  }	|   dkD  r|	dz  }	|dz  |	z  }
t        fd|D              }t        j                  |D ci c]  }|t        |
       c}|D ci c]  }|t        |       c}      S c c}w c c}w c c}w )Nc                     |    S r#   r   r   s    r*   r   z&_all_to_all_roofline.<locals>.<lambda>  s    $q' r=   )r   r   rA   r   c              3   .   K   | ]  }|   d z    yw)r   Nr   )r(   rP   r   s     r*   r+   z'_all_to_all_roofline.<locals>.<genexpr>  s     9tDJN9s   r   )r   r   rE   r   r%   r   r2   rH   rI   r   r   sumr4   rG   )r   r   r8   r9   r   rP   r5   smallest_axisr   bisection_bwr   r   r   s               @r*   _all_to_all_roofliner     s1    5S)DD\D	/	$				+	+CLL	9BGG$EDJE = 
$ (9:1=-^(m$A6,	-1AL Qh%) 9y99+		 	 09:tS^#:4=>Ds;''>
 #E$ ;>s   1D
+D$D)
permc          
      X   t        | |      x}r|S | j                  sJ | j                  j                  }|D cg c]  }|j                  |d       }}t        j
                  j                  | j                        }	t        t              }
d}|D ]$  \  }}||k(  rt        d t        j                  ||      D              }t        d t        j                  ||      D              }d}t        t        |            D ]  }||   }||   }||   }||k7  s||z
  |z  }||z
  |z  }||k  rdnd}|}||k7  r_t        j                   |||      }||z   |z  }t        j                   |||      }|
t        t#        ||g            xx   dz  cc<   |}||k7  r_t%        ||      }||z  } t'        ||      }' |	t'        |
j)                         d      z  }t	        j*                  |D ci c]  }|t-        |       c}|D ci c]  }|t-        |       c}      S c c}w c c}w c c}w )NrA   r   c              3   2   K   | ]  }t        |        y wr#   rG   r(   re   s     r*   r+   z%_ppermute_roofline.<locals>.<genexpr>       H!s1vH   c              3   2   K   | ]  }t        |        y wr#   r   r   s     r*   r+   z%_ppermute_roofline.<locals>.<genexpr>  r   r   )defaultr   )r   r   rE   r   r   r%   r   r2   r   floattuplerH   unravel_indexro   r   r
   tuple_updater   rp   rF   valuesr4   rG   )r   r   r   r8   r9   r   r   rP   	mesh_dims
shard_sizeici_contentionr   srcdst
src_coords
dst_coordsici_latency_for_permre   dim_sizesrc_posdst_posclockwise_distcounter_dist	directioncurr_poscurr_coordsnext_posnext_coordsdistancer   s                                 r*   _ppermute_roofliner    sK    5S)DD\D	/	$8AB$((4+B)B%%11#,,?*=H=O.+  9hc3
czHr'7'7Y'GHHJHr'7'7Y'GHHJ 3y>" )1h1g1g	G	!G+x7')X5'<7AR	'!))*aB+*h6())+q(C+
v{K&@AB
Cq
H
C( '! ~|4()), k#78KA 9D 3~446BB)		 	 09:tS^#:4=>Ds;''>
 S CT ;>s   H)H"H'
c                   d | j                   D        \  }t        j                  |D cg c]  }|j                  |    c}      }t	        t        t        |j                                    t	        |      z
  }t        j                  |D cg c]  }|j                  |    c}      }t        j                  t        |dz
  |z        t        |j                  j                  |j                  |z   z              S c c}w c c}w )Nc              3   Z   K   | ]#  }t         j                  j                  |       % y wr#   r$   r'   s     r*   r+   z)_reduce_sum_p_roofline.<locals>.<genexpr>  r,   r-   rA   r.   )r2   rH   rI   rE   setro   r   r   r4   rG   r6   r7   r5   )	r   r   r8   r9   r:   re   domain_size
other_axesresult_sizes	            r*   _reduce_sum_p_roofliner    s     
KS\\	J$1T223+5QWW&'#d)3*Z889+		 	  qK78 AGG,,0DEF
 	 38s   C<Dc                 P   d | j                   D        ^}}t        j                  j                  | j                  d         }t        j
                  |j                  |j                  j                  |j                  z  |j                  j                  |j                  z  z         S )Nc              3   Z   K   | ]#  }t         j                  j                  |       % y wr#   r$   r'   s     r*   r+   z'_select_n_p_roofline.<locals>.<genexpr>  s!     MX##--d3Mr-   r   r.   r1   )r   r8   r9   r:   rc   r;   s         r*   _select_n_p_roofliner    s~     NM'1q((q)9:#		 	 HH
''

QVV
#cii&8&8388&C
C
 r=   c                     | j                   }| j                  }t        j                  j	                  |      t        j                  j	                  |      z   }t        j
                  |      S N)r0   )r2   r3   r   r%   r   r4   )r   r8   r9   r2   r3   r]   s         r*   _callback_with_output_roofliner    s[     \\(mm)$$00((34)
 
	 	 9	==r=   c                     | j                   }t        j                  j                  |      }t        j                  |      S r  )r2   r   r%   r   r4   )r   r8   r9   r2   r]   s        r*   _debug_callback_roofliner  *  s8     \\( $$00:) 
	 	 9	==r=   )collectionsr   dataclassesr   	itertoolsrC   collections.abcr   numpyrH   jax._srcr   r   r   r	   r
   r   r   r   r   r   r   r   r   jax._src.laxr   r   r   r   r   r   r   lax_parallelr   r   r   jax.experimentalr   r^   chain__dict__r   sharding_constraint_pprim
isinstance	Primitiveregister_standard_rooflineRooflineRuleContextr4   r<   register_rooflineabs_pacos_pasin_patan_pcbrt_pceil_pconj_pcos_pcosh_pexp_pexpm1_pfloor_pimag_pinteger_pow_pis_finite_plog_plog1p_p
logistic_pneg_pnot_preal_pround_prsqrt_psign_psin_psinh_psqrt_psquare_ptan_pbessel_i0e_pbessel_i1e_p	digamma_p	erf_inv_perf_perfc_plgamma_ppvary_prO   add_psub_pmul_pdiv_prem_pand_por_pxor_pgt_plt_pge_ple_peq_pne_pmin_pmax_prG   rU   cummax_pcummin_p	cumprod_pcumsum_pcumlogsumexp_prX   dot_general_pDotDimensionNumbersrf   r   r{   ConvDimensionNumbersr%   r   "ConvGeneralDilatedDimensionNumbersr   conv_general_dilated_pr   r   r   boolr   reduce_scatter_pall_gather_pr   r   gather_pr   r   scatter_add_pscatter_max_pscatter_min_pscatter_mul_pscatter_sub_p	scatter_pr   pmin_ppmax_ppsum_invariant_pr   all_to_all_pr   
ppermute_pr  reduce_sum_pr  
select_n_pr  pure_callback_pio_callback_pr  debug_callback_pr  r   r=   r*   <module>r     s   $   $   "             &  BHH!	,,
 
,,,,//,,--//		%%'% .D( dnn%'H''-+.0		%	% 	 &   399 %&7 8 &   3:: &'8 9 &   3:: &'8 9 &   3:: &'8 9 &   3:: &'8 9 &   3:: &'8 9 &   3:: &'8 9 %   399 %&7 8 &   3:: &'8 9 %   399 %&7 8 '   3;; '(9 : '   3;; '(9 : &   3:: &'8 9 -   3,, -.? @ +   3?? +,= > %   399 %&7 8 '   3;; '(9 : *   3>> *+< = %   399 %&7 8 %   399 %&7 8 &   3:: &'8 9 '   3;; '(9 : '   3;; '(9 : &   3:: &'8 9 %   399 %&7 8 &   3:: &'8 9 &   3:: &'8 9 (   3<< (): ; %   399 %&7 8 0   7// 01B C 0   7// 01B C -   7,, -.? @ -   7,, -.? @ )   7== )*; < *   7>> *+< = ,   7++ ,-> ? # # #DLL 1		%	% 	( &   399 %&8 9 %   399 %&8 9 %   399 %&8 9 %   399 %&8 9 %   399 %&8 9 %   399 %&8 9 $   388 $%7 8 %   399 %&8 9 $   388 $%7 8 $   388 $%7 8 $   388 $%7 8 $   388 $%7 8 $   388 $%7 8 $   388 $%7 8 %   399 %&8 9 %   399 %&8 9		%	% 
   2   <00 12H I 1   <00 12H I 2   <11 23I J 1   <00 12H IL778		%	% 
  9. C--.## ,,
  /BSSS S 	S
 S S 38_S 	Sl,++,			, 
		, 
			,
 SM, eCHo&, 3-, 3-, 	,^"			"			" 
		" SM	"
 eCHo&" 3-" 3-" #EE" " 	"J K>>?#		%	%# SM# eCHo&	#
 3-# 3-# #EE# # # @#L
##
+0c?
t#
" 	$##$ 	c3h$ 	$ $N :   <88 9W 6   <44 5

#
#  		: G,,-		%	% 
#
#
  .*3		%	%3 	3l 2   700 12C D 1   700 12C D 1   700 12C D 1   700 12C D 1   700 12C D -   7,, -.? @Q		%	%Q S/Q
 Q 0   <.. /0K L /   <.. /0K L L99:## 	c3h
  ;" L556## 38_
  7D L3348##8 38_8 	eCHos"#	8 8 58v C,,-		%	% S/
  .( CNN+		%	% 	 ,  H445H223>		%	%> 	> 4 6>  I667>		%	%> 	> 8>r=   