
    uki2                        d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d Zd	 Z ej                  d
      Zde_        ej#                  e       ej%                  e       d Zd Z ej*                  eed       d Zy)    N)api)core)	tree_util)mlir)ir)hloc                 4    ~ t        j                  |       | S N)jax_corejaxpr_as_funjaxprargsunused_kwargss      P/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/cudnn/fusion.py_cudnn_fusion_implr      s    	%		u	%t	,,    c                     ~~| j                   S r
   )	out_avalsr   s      r   _custom_abstract_evalr      s    
	r   cudnn_fusionTc                 $    t        j                  t        j                  | fi |d      | \  }}t	        j
                  |      }t	        j                  |      }t        j                  || j                  |d}t	        j                  ||      S )z`Creates a new cudnn_fusion corresponding to calling
  the given function f with args and kwargs.T)return_shape)namer   )r   
make_jaxpr	functoolspartialr   tree_leavestree_structurecudnn_fusion_pbind__name__tree_unflatten)fr   kwargsr   
out_shapes	flat_argsout_treeout_flats           r   call_cudnn_fusionr*   *   s    cnna"6"	% ##D))%%j1(  )!**EJ(		!	!(H	55r   c                `   t        j                  | g||dz   |d}|d   j                  }|j                  d   }t	        j
                  |j                  D cg c]  }|j                   c}|j                  dt        j                  j                  |g            }|j                  S c c}w )zMake cudnn_fusion which calls the implementation function.
  Currently this leaks a CallOp since we're using the `core_call_lowering`
  function, but this should get cleaned up by DCE easily.
  z.impl)r   
call_jaxprr   calleez__cudnn$fusion)call_target_namecalled_computations)r   core_call_loweringowner
attributesr   CustomCallOpresultstypeoperandsr   	ArrayAttrget)	ctxr   r   r   implcall_op	called_fnrr   s	            r    _cudnn_fusion_stablehlo_loweringr>   6   s    
 
	 	 


G^
$ GMM'  *)!!__%QVV%%(()5	, 
		 &s   B+cuda)platformc                 6    t        j                  t        |       S )a>  Makes a function become a cuDNN kernel. Relies on XLA's handling of
  custom fusions with __cudnn$fusion backend. Currently limited to GEMM
  fusions. For example - batch matmul with mixed types and addition:

  @cudnn_fusion
  def fn(x, y, z):
      return jnp.float32(jax.lax.batch_matmul(jnp.bfloat16(x), y)) + z
  )r   r   r*   )r$   s    r   r   r   N   s     
		,a	00r   )r   jax._srcr   r   r   r   jax._src.interpretersr   jax._src.lib.mlirr   jax._src.lib.mlir.dialectsr   r   r   	Primitiver    multiple_resultsdef_abstract_evaldef_implr*   r>   register_loweringr    r   r   <module>rL      s      %  &   *-
 $##N3"&      !6 7   * +	6&   4v
	1r   