
    ukil}                    R   d dl mZ d dlmZ d dlZd dlmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlZd dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dl m!Z! d dl"Z# ejH                  d ejJ                  dd      d      Z& ejN                  d ejP                  dd      d      Z)dZ* ejV                  e,       ejZ                  Z- ej\                  e/      Z0d1dZ1d  Z2d2d!Z3d3d"Z4d3d#Z5	 	 	 	 	 	 	 	 	 d4	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d5d$Z6ejn                  	 	 	 	 	 	 	 	 	 	 d6d%       Z8ejn                  	 	 	 	 	 	 	 	 	 	 	 	 d7d&       Z9g Z:	 	 d8d'Z;	 d9	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d:d(Z<	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d;d)Z=	 d9	 	 	 	 	 	 	 	 	 	 	 d<d*Z>	 	 	 	 	 	 	 	 	 	 	 	 d=d+Z?i e?_@        	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d>d,ZAi eA_B        	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d?d-ZCd@d.ZD	 	 	 	 	 	 	 	 	 	 	 	 dAd/ZE	 	 	 	 	 	 	 	 	 	 	 	 dBd0ZFy)C    )annotations)SequenceN)partial)Any)Callable)	cache_key)compilation_cache)config)distributed)lib)
monitoring)path)profiler)traceback_util)util)mlir)
xla_client)_jax)irjax_disable_most_optimizationsJAX_DISABLE_MOST_OPTIMIZATIONSFzTry not to do much optimization work. This can be useful if the cost of optimization is greater than that of running a less-optimized program.%jax_compiler_detailed_logging_min_ops%JAX_COMPILER_DETAILED_LOGGING_MIN_OPS
   zHow big should a module be in MLIR operations before JAX enables detailed compiler logging? The intent of this flag is to suppress detailed logging for small/uninteresting computations.)helpc                     ~ y)Nr    )backends    L/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/compiler.pyget_latest_profile_versionr!   K   s    	    c                    |dz  }|dk  r|S | j                   D ])  }|D ]"  }|D ]  }t        ||      }|dk  s|c c c S  $ + |S )N   r   )regions_walk_operations)opkregionblockchild_ops        r    r&   r&   P   sg    q&!UH

 f  (Xq)q5( 
(r"   c                T    t         j                  }t        | j                  |      dk  S )zBReturns 'true' if detailed logging should be enabled for 'module'.r   )"_COMPILER_DETAILED_LOGGING_MIN_OPSvaluer&   	operation)modulebounds     r    use_detailed_loggingr2   ]   s%    
,
2
2%	&**E	2Q	66r"   c                    t         j                  j                  rt        j                  nt        j
                  }t        j                  |d| |       y )Nz5Persistent compilation cache hit for '%s' with key %r)r
   log_compilesr.   loggingWARNINGDEBUGloggerlog)module_namer   hit_log_prioritys      r    log_persistent_cache_hitr<   c   s:    )/)<)<)B)Bgoo"== **V)%r"   c                    t         j                  j                  r$t        j                         rt
        j                  nt
        j                  }t        j                  |d| |       y )Nz6PERSISTENT COMPILATION CACHE MISS for '%s' with key %r)
r
   explain_cache_missesr.   r	   is_persistent_cache_enabledr5   r6   r7   r8   r9   )r:   r   miss_log_prioritys      r    log_persistent_cache_missrA   j   sM    !66<<-IIK  %]] 
 	** X)%r"   c                V
   t        j                         }| |_        ||_        |j                  }||_        ||_        t        j                  j                  |_        |||_
        |r|xs g |_        |xs g |_        |.t        j                  d| ||       t        j                   |      }|j"                  dk(  r|dk(  r	|dddf   }| |j$                  d   k7  rd}t'        |j)                  ||             ||j$                  d   k7  rd}t'        |j)                  ||            |j*                  t,        k(  r# t        j.                  d t0        g      |      }t         j2                  j5                  |      }|j7                         | k(  sJ |j9                         |k(  sJ ||_        t        j<                  j                  |_        t        j>                  j                  |_        t        j@                  t        jB                  j                        j                  |_!        t        j@                  t        jD                  j                        j                  |_"        |hd	d
g}|jG                  ddg       tI        |      }|D ]#  }||v stK        |||jM                  |             % tO        |jQ                               |_)        |j                  jT                  }tV        jX                  tV        jX                  |_-        t\        j                  rd|_/        d|_0        d|_1        t        jd                  j                  sd|_3        t        jh                  j                  }|dkD  r||_5        t        j                  d|       nftl        |_5        |
t        jo                  d       nCtq        |
      }|dk7  r||_5        t        j                  d|       nt        js                  d       |	|_:        tw        jx                         rt{        j|                  t        j~                  j                  xs d      }t        j                  j                  xs d}|dk(  sd|v r2|dz  }t        |      |_B        d|_C        t        j                  d|       |dk(  sd|v r|dz  }t        |      |_D        t        j                  d|       t        j                  j                  dk(  r!t         j                  j                  |_J        |S t         j                  j                  |_J        |S )a  Returns the compile options to use, as derived from flag values.

  Args:
    num_replicas: Number of replicas for which to compile.
    num_partitions: Number of partitions for which to compile.
    device_assignment: Optional ndarray of jax devices indicating the assignment
      of logical replicas to physical devices (default inherited from
      xla_client.CompileOptions). Must be consistent with `num_replicas` and
      `num_partitions`.
    use_spmd_partitioning: boolean indicating whether to enable SPMD or MPMD
      partitioning in XLA.
    use_auto_spmd_partitioning: boolean indicating whether to automatically
      generate XLA shardings for SPMD partitioner.
    auto_spmd_partitioning_mesh_shape: device mesh shape used to create
      auto_spmd_partitioning search space.
    auto_spmd_partitioning_mesh_ids: device ids used to create
      auto_spmd_partitioning search space.
    env_options_overrides: dict of additional options parsed by the compiler
    fdo_profile: Optional profile for feedback-directed optimization passed to
      XLA.
    detailed_logging: Is this an "interesting" computation about which XLA would
      be wise to log compilation information?
    backend: the client, if available.
  NzKget_compile_options: num_replicas=%s num_partitions=%s device_assignment=%sr$   r   z8device_assignment does not match num_replicas: {} vs {}.z:device_assignment does not match num_partitions: {} vs {}.c                    | j                   S Nid)ds    r    <lambda>z%get_compile_options.<locals>.<lambda>   s
     r"   )otypesexec_time_optimization_effortmemory_fitting_effortoptimization_levelmemory_fitting_levelTFrematerializationzSget_compile_options XLA-AutoFDO profile: using JAX XLA profile version %d from flagzGget_compile_options: no backend supplied; disabling XLA-AutoFDO profilezMget_compile_options XLA-AutoFDO profile: using XLA-AutoFDO profile version %dzaget_compile_options XLA-AutoFDO profile: XLA-AutoFDO profile version is 0; this should not happen allxla_gpu_kernel_cache_filez!Enabling XLA kernel cache at '%s'%xla_gpu_per_fusion_autotune_cache_dirz%Enabling XLA autotuning cache at '%s')LxcCompileOptionsnum_replicasnum_partitionsexecutable_build_optionsuse_spmd_partitioninguse_auto_spmd_partitioningr
   use_shardy_partitionerr.   fdo_profile!auto_spmd_partitioning_mesh_shapeauto_spmd_partitioning_mesh_idsr8   debugnparrayndimshape
ValueErrorformatdtypeobject	vectorizeintDeviceAssignmentcreatereplica_countcomputation_countdevice_assignmentrJ   rK   EffortLevelrL   rM   extenddictsetattrpoplistitemsenv_option_overridesdebug_optionsr   	cuda_pathxla_gpu_cuda_data_dir_DISABLE_MOST_OPTIMIZATIONSxla_backend_optimization_level!xla_llvm_disable_expensive_passesxla_test_all_input_layoutsenable_remat_opt_passxla_disable_hlo_passesjax_xla_profile_versionprofile_version_NO_PROFILE_DONT_RETRIEVEinfor!   errorxla_detailed_loggingr	   r?   pathlibPathcompilation_cache_dir"persistent_cache_enable_xla_cachesstrrQ   2xla_gpu_enable_llvm_module_compilation_parallelismrR   r   global_state
process_idAutotuneCacheModeUPDATE(xla_gpu_experimental_autotune_cache_modeREAD)rU   rV   rm   rX   rY   r\   r]   env_options_overridesr[   detailed_loggingr   compile_optionsbuild_optionsmsgoverrides_on_build_optionsnamerv   r   fdo_profile_versionr   enabled_flagskernel_cache_pathautotune_cache_paths                          r    get_compile_optionsr   t   s   J %%'/!-/#1/ !::-(=-%-G-*)/)F)F)L)L-& +M6W6][]M34S4YWYM1"
LLUn&79 !23 	!#.A*=+AtG4(..q11Fcszz"3\BCC*0033Hcszz"3^DEE&(D",,~seD
++223DE**,<<<..0NBBB(9O%060T0T0Z0Z--(.(D(D(J(J-%%+%7%7%%&	E " (.'9'9!!''(	E $ & 	()@"B%%	56 !!67* F	&	&t%:%>%>t%DEF ,00E0K0K0M+NO(!::HH-]]*---M' &&34M06:M3/4M,		%	%	+	++>M( #::@@q &=O#
LL >(* '@O#kk 3 4 7w?		!*=' <(	* 	 P 	Q (8-$ 224 <<44::@bAD==CCIrM!<!M!<<034E0Fm-IMmFll68IJ!HM!Y #JJ<?@S<Tm9ll:<OP 
	!	!	,	,	1ACAUAUA\A\> 
 BDAUAUAZAZ>	r"   c                   |j                   j                  d   }t        j                  |      j                  }|j
                  j                  St        |j
                  j                        r4t        j                  d|t        |j
                  j                               	 | j                  |||      S # t        j                  $ r}t        D ]  } ||      }||| |d }~ww xY w)Nsym_name1Compiling module %s with FDO profile of length %d)r/   
attributesr   
StringAttrr.   rW   r[   lenr8   r^   compiler   JaxRuntimeError_XLA_RUNTIME_ERROR_HANDLERS)	r   r0   executable_devicesoptionsr   r:   eerror_handlerhandler_results	            r    backend_compiler     s     ((4(h'--+&&22>
g..::
;
LL;G,,889??6#5w??			 4 $$Q'n		#!#$ Gs   #B6 6C(	C#C##C(c                   |j                   j                  d   }t        j                  |      j                  }|j
                  j                  St        |j
                  j                        r4t        j                  d|t        |j
                  j                               	 t        | t        j                        r+|r| j                  ||||      S | j                  |||      S |r| j                  ||||      S | j                  |||      S # t        j                  $ r}t         D ]  } ||      }	|	|	| |d }~ww xY w)Nr   r   )r   r   host_callbacks)r   r   )r/   r   r   r   r.   rW   r[   r   r8   r^   
isinstancer   CompileOnlyPyClientr   compile_and_loadr   r   )
r   r0   r   r   r   r   r:   r   r   r   s
             r    backend_compile_and_loadr   8  si    ((4(h'--+&&22>
g..::
;
LL;G,,889) '4334	1#)	  
 	
 __
/!    
''1#)	 ( 
 	
 %%
/! &  
 
		 4 $$Q'n		#!#$ Gs0   #0D D (D ?D E&E :E  Ec                .    t         j                  |        y)a  Registers a custom exception handler for XLA runtime errors.

  Registering a custom handler allows re-raising a more informative exception
  after encountering an XLARuntimeError.

  Args:
    handler_fn: A function which returns a new exception to replace the original
      XLA runtime error, or None if the original error should be propagated.

  Returns:
    A new exception or None.
  N)r   append)
handler_fns    r    "register_xla_runtime_error_handlerr   z  s     $$Z0r"   c                   |j                   j                  d   }t        j                  |      j                  }t        j                  |d      x}	rt        j                  d|	       t        |j                         D 
ch c]  }
|
j                   c}
      dkD  }t        |j                         d       j                  }t        |||| ||||      \  }}|t        | ||||      S t        j                   d       t#        j$                         }t'        |||| |      \  }}t#        j$                         |z
  }|V|J t)        ||       t        j                   d       t        j*                  d	||z
         t        j*                  d
|       |S t-        j.                  d       t0        j2                  j                  ra|r_t4        j6                  j8                  Et        |      dk(  r7t;        ||       t=        | ||||t4        j6                  j8                  |||	      S t;        ||       t?        | ||||||      S c c}
w )Nr   r   zDumped the module to %s.r$   c                    | j                   S rD   rE   )devices    r    rH   z'compile_or_get_cached.<locals>.<lambda>  s
    FII r"   )keyz1/jax/compilation_cache/compile_requests_use_cachez!/jax/compilation_cache/cache_hitsz-/jax/compilation_cache/compile_time_saved_secz//jax/compilation_cache/cache_retrieval_time_sec)compile_after_persistent_compilation_missr   ) r/   r   r   r   r.   r   dump_module_to_filer8   r   r   flattenprocess_indexmin_resolve_compilation_strategyr   r   record_eventtime	monotonic_cache_readr<   record_event_duration_secsr   
test_eventr
   share_binary_between_hostsr   r   clientrA   _compile_and_share_module_compile_and_write_cache)r   computationdevicesr   r   r   pgle_profilerr   r:   	dumped_tor   is_multi_processmin_device_process_idr   cache_retrieval_startretrieved_executableretrieved_compile_timecache_retrieval_times                     r    compile_or_get_cachedr     sS    ""--j9(h'--+**;	BBYB
KK*I6 
goo.?
@F6
@AAE  oo5M 
  =	 )_ #0/  MN..*1<9ow8J2L..),AA%!---[)4?@))7!557 ))9;OQ  //=>''--


"
"
)
)
5 n

"k95$  ''
 
 k95# E As   >H>c                   t         j                  j                  xr t         j                  j                  dkD  }t	        t
        || |      }	|st         j                  j                  r6 |	|d      }
t        j                  |      }|xj                  dgz  c_	        nd }
|} |	|      }||
t        ||
      rIt         j                  j                  rt        j                  d| d       ||j                          |
|fS t         j                  j                  r%t        ||      rt        j                  d| d	       |r|j                   j"                  vt%        |j                   j"                        rW|rQt&        j(                  j*                  7t-        | |||t&        j(                  j*                  |      |j                   _        |
|fS ||fS )
Nr   )r   r   r   s   pgle profiled)override_fdo_profile)xla_gpu_enable_command_bufferrO   zPGLE-optimized z loaded from compilation cachez)PERSISTENT CACHE MISS for PGLE-optimized z^ despite non-PGLE hit; it may not have been executed enough times when the cache was populated)r
   enable_pgler.   pgle_profiling_runsr   _get_cache_keycompilation_cache_expect_pglecopydeepcopyru   _is_executable_in_cacher8   r   disablewarningswarnrW   r[   r   r   r   r   _share_fdo_profiles)r   r   r   r   r   r   r:   r   is_auto_pgle_usedget_cache_keypgle_optimized_cache_keyfirst_pass_compile_optionsr   s                r    r   r     s    G6#=#=#C#Ca#G  .'&17D- &>>DD,_BR T "&!?33+8 3  $!0 67)7Cw(@A		-	-	3	3ok]2PQR		"%66

.
.
4
4%gy9 mm?} M@ @ A 

2
2
>
>
J
o66BB
C K44;;G$$++!	
 ..: $_44 000r"   c                   t        j                  |      sy t        j                  j                  rt
        j                  j                  }nt
        j                  j                  }|&t        j                  |       } || j                  _        	 t        j                  ||| ||      S # t        j                  $ r }t         j#                  d|       Y d }~y d }~ww xY w)NzKcompile_or_get_cached: unable to generate cache key, skipping the cache: %s)r	   is_cache_usedr
   -remove_custom_partitioning_ptr_from_cache_keyr.   cache_key_typeIgnoreCallbacksCUSTOM_PARTITIONINGNOr   r   rW   r[   r   r   r   r8   r   )r   r   r   r   r   ignore_callbacksexs          r    r   r   N  s     
	(	(	199??%55II%5588%mmG$G3GG$$0
0**  
		 0
LL +,.0 0	0s   B( (C;CCc                B   | j                   j                  d   }t        j                  |      j                  }|j
                  j                  }t        |      dk(  r|S d|j
                  _        	 t        j                  | |||t        j                  j                        dz   }	|	t"        j$                  v rt"        j$                  |	   S t&        j(                  j                  }t*        j,                  j.                  |k(  r*t        j1                  d||       |j3                  |	|       n)t        j1                  d||       |j5                  |	|      }|t"        j$                  |	<   |S # t        j                  $ r"}
t        j!                  d|
       |cY d }
~
S d }
~
ww xY w)Nr   r   r"   	_fdo_synczYcompile_or_get_cached: unable to generate cache key, skipping the fdo profile sharing: %sz+Module %s. Sharing FDO profile. Process %d.zEModule %s. Waiting for FDO profile which should be set by process %d.)r/   r   r   r   r.   rW   r[   r   r	   r   r   r   ALLr   r   r8   r   r   modules_profilesr
   %share_binary_between_hosts_timeout_msr   r   r   r^   key_value_set_bytesblocking_key_value_get_bytes)r   r   r   r   global_clientmin_process_idr   r:   r[   profile_keyr   share_timeouts               r    r   r   l  s    ""--j9(h'--+88DD+9</**6''**..	
 	 $ '888//<<>>DD-((N:
LL5
 %%k;?
LLO
  <<]K 7B&&{3	= 
		 
LL	/

 s   14E) )F<FFFc	           	     H   t         j                  j                  }	|t        j                  v rt        j                  |   S t
        j                  j                  |k(  rat        j                  d||       t        | ||||||      }
| j                  |
      }t        j                  |      }|j                  ||       nQt        j                  d||       |j                  ||	      }t        j                   |      }| j#                  |||      }
|
t        j                  |<   |
S )Nz+Process %d compiling and sharing module: %sz&Waiting for module: %s from process %d)r
   r   r.   r   modules_cacher   r   r   r8   r^   r   serialize_executabler	   compress_executabler   r   decompress_executabledeserialize_executable)r   r   r   r   r   r   r:   r   first_process_idr   
executableserialized_executables               r    r   r     s,    >>DD-+999$229==((,<<
LL>!;0)J $88D-AA %%i1FG
LL9;!#)FF= .CC //1?DJ 8B)))4	r"   c                    t        j                         }t        | ||||      }t        j                         |z
  }	t        ||	|| ||       |S rD   )r   r   r   _cache_write)
r   r   r   r   r   r:   r   
start_timer  compile_times
             r    r   r     sV     ~~*'{.* !J.,{GZ 
r"   c           	         	 t        j                  | |      S # t        $ rV}t        j                  j
                  r t        j                  d| dt        |      j                   d|        Y d}~yd}~ww xY w)z<Checks if executable is presented in cache on a given key
  6Error reading persistent compilation cache entry for '': : NF)
r	   is_executable_in_cache	Exceptionr
   raise_persistent_cache_errorsr.   r   r   type__name__)r   r   r   s      r    r   r     ss    33GYGG	 ++11MM;c$r(++,Brd	45 s    	A7AA22A7c           	         	 t        j                  ||||      S # t        $ rV}t        j                  j
                  r t        j                  d|  dt        |      j                   d|        Y d}~yd}~ww xY w)ziLooks up the `computation` and it's compilation time in the persistent
  compilation cache repository.
  r	  r
  r  N)NN)
r	   get_executable_and_timer  r
   r  r.   r   r   r  r  )r:   r   r   r   r   r   s         r    r   r     s    	44?G-?A A	 ++11MM=DH--.b	67 s    	A9AA44A9c           	        t         j                  j                  r$t        j                         rt
        j                  nt
        j                  }t        j                  j                  dk7  rt        j                  |d       y|rt        j                  |d|       yt         j                  j                  }||k  rt        j                  |d|||       yt        j                  d|||       	 t        j                  | |||t!        |             y# t"        $ rV}t         j$                  j                  r t'        j(                  d| dt+        |      j,                   d	|        Y d}~yd}~ww xY w)
zqWrites the `serialized_computation` and its compilation time to the
  persistent compilation cache repository.
  r   z8Not writing persistent cache entry since process_id != 0NztNot writing persistent cache entry for '%s' because it uses host callbacks (e.g. from jax.debug.print or breakpoint)z]Not writing persistent cache entry for '%s' because it took < %.2f seconds to compile (%.2fs)z2'%s' took at least %.2f seconds to compile (%.2fs)z6Error writing persistent compilation cache entry for 'r
  r  )r
   r>   r.   r	   r?   r5   r6   r7   r   r   r   r8   r9   &persistent_cache_min_compile_time_secsr^   put_executable_and_timerh   r  r  r   r   r  r  )	r   compile_time_secsr:   r   r  r   log_prioritymin_compile_timer   s	            r    r  r    sT    0066'CCE // }}  ((A-
JJ|IK

JJ	>?JL BBHH))
JJ	%&13C	
 
LL<%'8:7--;
GS9J5KM	 7++11MM=DH--.b	67 77s   /"D 	E1AE,,E1)r   	xc.Clientreturnrh   )r0   	ir.Moduler  bool)r:   r   r   r   r  None)	NTFNNNNTN)rU   rh   rV   rh   rX   r  rY   r  r\   list[int] | Noner]   r  r   zdict[str, str] | Noner[   bytes | Noner   r  r   zxc.Client | Noner  xc.CompileOptions)
r   r  r0   r  r   xc.DeviceListr   r   r  zxc.Executable)r   r  r0   r  r   r!  r   r   r   Sequence[Any]r  xc.LoadedExecutable)r   z2Callable[[_jax.JaxRuntimeError], Exception | None]rD   )r   r  r   r  r   
np.ndarrayr   r   r   r"  r   r!  r   profiler.PGLEProfiler | Noner  r#  )r   r  r   r$  r   r   r   r  r   r%  r   r  r:   r   r   rh   r  z$tuple[str | None, xc.CompileOptions])r   r   r   r  r   r  r   r$  r   r  r  z
str | None)r   r  r   r$  r   r   r   r  r   !lib._jax.DistributedRuntimeClientr  bytes)r   r  r   r  r   r!  r   r   r   r"  r   r&  r:   r   r   r   r  rh   r  r#  )r   r  r   r  r   r!  r   r   r   r"  r:   r   r   r   r  r#  )r  r  )r:   r   r   r   r   r   r   r  r   r!  r  z-tuple[xc.LoadedExecutable | None, int | None])r   r   r  floatr:   r   r   r  r  r#  r   r"  r  r  )G
__future__r   collections.abcr   r   	functoolsr   r5   r   typingr   r   r   jax._srcr   r   r	   r
   r   r   r   r   r   r   r   r   jax._src.interpretersr   jax._src.libr   rS   r   jax._src.lib.mlirr   numpyr_   	bool_flagbool_envry   int_flagint_envr-   r   register_exclusion__file__rT   	getLoggerr  r8   r!   r&   r2   r<   rA   r   annotate_functionr   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r"   r    <module>r:     s  " # $      $  0 & %     $  #  & )     /f..$FOO4e<MN  &5V__+FNN:B?	A	& "   ! ! !( +""			8	$

7%% "&',:>8<37 $! $fff  	f
 !%f (8f &6f 1f f f f fR 
 & 	
  4 
;;; &; 	;
 "; ; ;| ! 1B12 37ZZZ Z '	Z
 "Z &Z 0Z ZfM1M1M1 'M1 	M1
 0M1 M1 M1 M1 *M1h *.  	
 '
 3=<888 '8 	8
 58 8v (*  $
... &. '	.
 ". 5. . . . .b +-  ' & '	
 "   (!$7H,9 3&/7$)/7!/7 $/7 2E/7 "/	/7 48	/7r"   