
    uki-.                       U d dl mZ d dlmZmZ d dlmZmZ d dlm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZm Z  d dl!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(m)Z)m*Z* eZ+eZ,ejZ                  ej\                  z  Z/ej`                  Z1 eddd       G d d             Z2 eddd       G d d             Z3 eddd       G d d             Z4 G d de      Z5i Z6de7d<   d d d!	 	 	 	 	 	 	 	 	 	 	 d*d"Z8d+d#Z9	 	 	 d,d d d d d$	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d-d%Z:d.d&Z;d.d'Z<d d d d(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d/d)Z=y)0    )annotations)	dataclassfield)AnyProtocol)CallableSequenceN)logging)NamedSharding)api)core)prng)source_info_util)traceback_util)util)
make_jaxpr)	dce_jaxpr)AbstractMeshMesh)broadcast_prefixtree_flattentree_unflattentree_map)foreach)	shard_mapshard_map_pT)frozenslotskw_onlyc                  ^    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   y)RooflineRuleContextzsource_info_util.NameStack
name_stackcore.Primitive	primitiveSequence[core.AbstractValue]avals_in	avals_outzcore.JaxprEqnContextjaxpr_eqn_ctxMesh | AbstractMesh | Nonemeshboolpin_lhs_in_vmempin_rhs_in_vmemN)__name__
__module____qualname____annotations__     ]/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/experimental/roofline/roofline.pyr!   r!   -   s2    (((())%%""r3   r!   c                  j    e Zd ZU ded<   ded<   ed
d       Zedd       Zedd       Zedd       Z	y	)RooflineShapeztuple[int, ...]shapeValidRooflineDtypedtypec                   t        |t        j                        st        dt	        |       d      t        |j
                  t              s"t        dt	        |j
                         d       | |j                  |j
                        S )NzExpected ShapedArray, got .z(Expected numpy or prng.KeyTy dtype, got )r7   r9   )
isinstancer   ShapedArray	TypeErrortyper9   r8   r7   )clsavals     r4   	from_avalzRooflineShape.from_aval>   st    dD,,-24:,a@AAdjj"454T$**5E4Fa
H  TZZtzz22r3   c                R    t        t        j                  | j                              S N)intnpprodr7   selfs    r4   sizezRooflineShape.sizeH   s    rwwtzz"##r3   c                Z    t        | j                  | j                  j                  z        S rD   )rE   rJ   r9   itemsizerH   s    r4   byteszRooflineShape.bytesL   s     tyy4::...//r3   c                ,     t         fd|D              S )Nc              3  T   K   | ]  }j                  |      j                   ! y wrD   )rB   rM   ).0rA   r@   s     r4   	<genexpr>z,RooflineShape.total_bytes.<locals>.<genexpr>R   s      ;Ts}}T"((;s   %(sum)r@   avalss   ` r4   total_byteszRooflineShape.total_bytesP   s    ;U;;;r3   N)rA   core.AbstractValuereturnr6   )rW   rE   )rT   r%   rW   rE   )
r.   r/   r0   r1   classmethodrB   propertyrJ   rM   rU   r2   r3   r4   r6   r6   9   s\    		3 3 $ $ 0 0 < <r3   r6   c                      e Zd ZU dZded<   dZded<    ee      Zded<    ee      Z	ded<   dZ
ded	<   dZded
<   dZded<   edd       ZddZddZddZy)RooflineResultr   rE   flopsunfused_flops)default_factorydict[str, int]	ici_bytesici_latency	hbm_bytespeak_hbm_bytesunfused_hbm_bytesc                     |        S rD   r2   )r@   s    r4   zeroszRooflineResult.zeros_   s	    5Lr3   c                   dd}t        | j                  |j                  z   | j                  |j                  z    || j                  |j                         || j                  |j                        | j
                  |j
                  z   t        | j                  |j                        | j                  |j                  z         S )Nc           	         t        |       t        |      z  D ci c](  }|| j                  |d      |j                  |d      z   * c}S c c}w )Nr   )setget)d1d2ks      r4   merge_ici_dictsz/RooflineResult.__add__.<locals>.merge_ici_dictsd   sA    69"gB6GHa1q!,,HHHs   -A
r\   r]   r`   ra   rb   rc   rd   )rk   r_   rl   r_   rW   r_   )	r[   r\   r]   r`   ra   rb   maxrc   rd   )rI   otherrn   s      r4   __add__zRooflineResult.__add__c   s    I jj5;;&((5+>+>>!$..%//B#D$4$4e6G6GH..5??24..0D0DE0053J3JJ r3   c                   t        t        | j                  |z        t        | j                  |z        | j                  j                         D ci c]  \  }}|t        ||z         c}}| j                  j                         D ci c]  \  }}|t        ||z         c}}t        | j                  |z        t        | j                  |z        t        | j                  |z              S c c}}w c c}}w )Nro   )
r[   rE   r\   r]   r`   itemsra   rb   rc   rd   )rI   constantrm   vs       r4   __mul__zRooflineResult.__mul__q   s    $**x'($,,x7848NN4H4H4JKDAq1c!h,''K6:6F6F6L6L6NOdaQAL))Odnnx/04..9:d44x?@  LOs   C1
C7c                $    | j                  |      S rD   )rw   )rI   ru   s     r4   __rmul__zRooflineResult.__rmul__|   s    <<!!r3   N)rW   r[   )rq   r[   rW   r[   )ru   zint | floatrW   r[   )r.   r/   r0   r\   r1   r]   r   dictr`   ra   rb   rc   rd   rX   rf   rr   rw   ry   r2   r3   r4   r[   r[   U   ss    %.-#D9)^9 %d ;+~;)S.#S 	"r3   r[   c                       e Zd Z	 	 	 	 	 	 ddZy)_RooflineRulec                     y rD   r2   )rI   ctxargskws       r4   __call__z_RooflineRule.__call__   s    r3   N)r~   r!   r   r6   rW   r[   )r.   r/   r0   r   r2   r3   r4   r|   r|      s    "+8r3   r|   z#dict[core.Primitive, _RooflineRule]
_rooflinesFr,   r-   c                  t        j                  t        j                  d|             }t        j                         }i dfd}dfd}dddd}	t        |t        j                        r|j                  n|}fd}
t        ||j                  t        |
|j                               t        ||j                  t        |
|j                               t        j                  |      } |	t        j!                                     }|}|j"                  D ]  }|j$                  j'                  ||j$                  j(                  z         }t        j*                  |j$                  j,                  |j(                        5  d|j.                  v rJ|t1        t        j                  |j2                  j4                  |       |j.                  d   |||	      z  }n^d
|j.                  v rJ|t1        t        j                  |j2                  j4                  |       |j.                  d
   |||	      z  }n|j2                  t6        vr]d|j2                   d}t9        |      D ])  }|j;                  d      r|d| dt=        ||       z  }+ t?        j@                  |       nt6        |j2                     }| |tC        |j(                  |j2                  t        |j                        t        |jD                        |jF                  |||      gt        ||j                        i |j.                  z  }t        |
|jD                        }| |	|      z  }t        ||jD                  |       |j                  D cg c](  }t        |t        jH                        s||   |u r|   * }}| |	|      z  }t        jJ                  ||       tM        ||      }d d d         |t	        |      z  }|S c c}w # 1 sw Y   xY w)Nrooflinec                    |J || <   y rD   r2   )rv   nodeenvs     r4   writez$_roofline_interpreter.<locals>.write   s    CFr3   c                    t        |       t        j                  u r2t        j	                  t        j
                  | j                              S t        | t        j                        sJ |    S rD   )	r?   r   Literalr6   rB   abstractifyvalr<   Var)rv   r   s    r4   readz#_roofline_interpreter.<locals>.read   sP    Aw$,,$$T%5%5aee%<==488$$$Vmr3   c                    t        |       t        j                  u rt        j                  | j                        S | j
                  S rD   )r?   r   r   r   r   rA   )rv   s    r4   rA   z#_roofline_interpreter.<locals>.aval   s0    Aw$,,aee$$VVmr3   c                &    t        d | D              S )Nc              3  4   K   | ]  }|j                     y wrD   )rM   )rP   r7   s     r4   rQ   z;_roofline_interpreter.<locals>.sum_bytes.<locals>.<genexpr>   s     /uu{{/s   rR   )shapess    r4   	sum_bytesz(_roofline_interpreter.<locals>.sum_bytes   s    ////r3   c                :    t         j                   |             S rD   )r6   rB   )xrA   s    r4   <lambda>z'_roofline_interpreter.<locals>.<lambda>   s    -"9"9$q'"B r3   )r"   jaxprr   
call_jaxprzNo roofline rule for z, skipping..._
z: )r"   r$   r&   r'   r(   r*   r,   r-   )rc   )rv   zcore.Varr   r6   )rv   	core.AtomrW   r6   )rv   r   rW   rV   )r   zSequence[RooflineShape]rW   rE   )'r   new_name_stackr   	wrap_namer[   rf   r<   r   ClosedJaxprr   r   	constvarsmapinvars	last_usedlistvalueseqnssource_inforeplacer"   user_context	tracebackparams_roofline_interpreterr$   namer   dir
startswithgetattrr
   warningr!   outvarsr~   r   clean_up_dead_varsrp   )f_namer   r*   r,   r-   r"   resultr   r   r   make_roofline_shaper   current_hbm_bytesrc   eqnr   msgattrruleoutvar_shapesrv   removed_shapesrA   r   s                         @@r4   r   r      s     ..t~~j&/QR*!&')#0 $E4+;+;<%++%%B			OOU__-
 
%s#6EFnnU#)SZZ\ 23$.ZZ =>c//))coo888 * K 
	&	&	ooK,B,B
 9> 
CJJ	'
..++V
4
**W

))
 	
 3::% 	'
..++V
4
**\
"
))
 	
 ==
*%cmm_MBH 	5D%RvRT 2344C	5 	#--($
"--mmszz*$,''++	
 tSZZ 
 JJ
 	
  -s{{;m9]33eS[[-0 **At||,11D a&n  9^44
c3	2>+<=ns9> 9>	=>~ 	N.99&	-e9> 9>s&   C2P:D
P-O=10P=PP	c                B     t        j                          fd       }|S )Nc                 v    t        j                  g|  \  }} |t        t        j                  |            S rD   )r   vjpr   jnpbfloat16)r   primalsf_vjpfs      r4   wrappedz_f_with_vjp.<locals>.wrapped   s0    WWQ&&NGU#,,011r3   )r   wraps)r   r   s   ` r4   _f_with_vjpr      s$    ::a=2 2 
.r3   r,   r-   r   print_jaxprc          
          t        j                         t        j                   fd              }|S )Nc                    }t        |      }rt        |      } t        |d      |  \  }}	 	 	 	 	 	 dfd}53t        |      }t	        |      \  }}t        |||      }t        ||      }dt        |j                  j                        z  }t        |j                  |      \  }}	|j                  D 
cg c]  }
|
j                  t        k(  s|
 }}
|r	 |d   j                  d   }rt#        |       |t%        t'        j(                        |
      fS c c}
w # t        $ r t!        d| d	      w xY w)N)r*   in_specs	out_specsT)return_shapec                p    t        j                  | j                  | j                  t	        |            S N)sharding)r   ShapeDtypeStructr7   r9   r   )r7   out_specr*   s     r4   make_sharded_shape_dtype_structzBroofline.<locals>.wrapped.<locals>.make_sharded_shape_dtype_struct  s.     !!U[[=x+H r3   )Tr   zMissing shard_map jaxpr in r;   r   )r7   api.ShapeDtypeStructr   SpecsrW   r   )r   r   r   r   r   r   r   lenr   r   r   r   r$   r   r   KeyError
ValueErrorprintr   r   fun_qual_name)r   	wrapped_fr   
out_shapesr   out_specs_flatflat_out_shapestreedefused_outputsr   eshard_map_eqnsr   r   r*   r   r,   r-   r   r   s               r4   r   zroofline.<locals>.wrapped  s    I	 5$:JID8&/1i
i(i@
94@$GE:!-2	 !1'	:>n!-j!9ow
)?No "'?;jS!4!455Ll3HE1::!;N  Ar"))'2 El,1''    A6ugQ?@@As   D+D+'D0 0E	r   r   r   api_boundary)	r   r*   r   r   r,   r-   r   r   r   s	   ```````` r4   r   r     s:     ::a=- -  -^ 
.r3   c                     d fd}|S )Nc                    | t         <   | S rD   r   )r   prims    r4   registerz#register_roofline.<locals>.registerD  s    JtKr3   )r   r|   r2   )r   r   s   ` r4   register_roofliner   C  s     
/r3   c                    dd}|t         | <   y )Nc                *    t         j                         S rD   )r[   rf   )r~   r   kwargss      r4   standard_rulez1register_standard_roofline.<locals>.standard_ruleL  s    !!r3   )r~   r!   r   )r   r   s     r4   register_standard_roofliner   K  s    " #*Tr3   r,   r-   r   c          	          t        j                         t        j                   fd              }|S )Nc                      t        	      |  \  }}|| t        d	      t        d |        d   fS )Nr   Tr   c                    t        j                  | j                  | j                  t        j
                  k(  rt        j
                  nt        j                  | j                        S r   )r   r   r7   r9   r   int32r   r   )r   s    r4   r   z4roofline_and_grad.<locals>.wrapped.<locals>.<lambda>w  s;    C((GGCII-CII3<<ZZ r3      )r   r   )
r   primal_shapes
fwd_resultr   r   r*   r   r,   r-   r   s
      r4   r   z"roofline_and_grad.<locals>.wrapped\  s    !
%%! !M: 	h	''	 

 

$ 	
% r3   r   )r   r*   r   r   r,   r-   r   r   s   ``````` r4   roofline_and_gradr   R  s:     ::a=! !  !F 
.r3   )r   strr   z
core.Jaxprr*   Mesh | AbstractMeshr,   r+   r-   r+   rW   r[   )r   r   )NNN)r   r   r*   r)   r   Specs | Noner   r   r,   r+   r-   r+   r   r+   r   r+   rW   z:Callable[..., tuple[ShapeDtypeStructTree, RooflineResult]])r   r#   )r   r   r*   r   r   r   r   r   r,   r+   r-   r+   r   r+   rW   zJCallable[..., tuple[ShapeDtypeStructTree, RooflineResult, RooflineResult]])>
__future__r   dataclassesr   r   typingr   r   collections.abcr   r	   numpyrF   abslr
   	jax.numpyr   jax.shardingr   jax._srcr   r   r   r   r   r   jax._src.apir   "jax._src.interpreters.partial_evalr   jax._src.meshr   r   jax._src.tree_utilr   r   r   r   jax._src.utilr   jax._src.shard_mapr   r   ShapeDtypeStructTreer   r9   KeyTyr8   safe_mapr   r!   r6   r[   r|   r   r1   r   r   r   r   r   r   r2   r3   r4   <module>r     s#   # (   .    &    % #  # 8 , W W ! 5  XX

* 
mm $dD1  2 $dD1< < 2<6 $dD1'" '" 2'"TH  35
/ 4  o
o	o 	o
 o o od (,!"	< "!<<
$< < 	< < < 
< < @<~#  /// / 	/ / / / P/r3   