
    uki
                        d Z ddlZddlZddlmZ ddlmZ ddl	m
Z d Z ej                  ej                  g d      ddd	d
dej                  dej                  dedej"                  dz  dedej                  fd       Zy)zeExample matmul TPU kernel.

See discussion in https://docs.jax.dev/en/latest/pallas/tpu/matmul.html.
    N)pallas)tpuc                    t        j                  t        j                  d      dk(        fd       }d   t        j                  | d   |d   j
                        z   d<   d   j                  |j
                        |d<   y )N   r   c                  6    t        j                          d<   y )N.)jnp
zeros_like)acc_refs   a/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/experimental/pallas/ops/tpu/matmul.pyinitzmatmul_kernel.<locals>.init   s    >>'*GCL    .)preferred_element_type)plwhen
program_idr   dotdtypeastype)
x_tile_ref
y_tile_ref
o_tile_refr
   r   s      ` r   matmul_kernelr      s    772==q !+ "+ oo$]]! '#, CL''
(8(89*S/r   )block_shapeblock_kdebug	out_dtype)static_argnames   F)r   r   r   xyr   r   r   returnc                   |J| j                   |j                   k7  r%t        d| j                    d|j                          | j                   }t        j                  }| j                   t        j                  t        j
                  t        j                  t        j                  fv rt        j                  }|\  }} t        j                  t        t        j                  | j                  d   |j                  d   f|      t        j                   dt        j"                  ||fd       t        j"                  ||fd       gt        j"                  ||fd       | j                  d   |z  |j                  d   |z  | j                  d   |z  ft        j$                  ||f|      g      t        j&                  d	
      |      | |      S )Nz7Cannot deduce output dtype for different input dtypes: z, r      c                 
    | |fS N )i_ks      r   <lambda>zmatmul.<locals>.<lambda>J   
    !Q r   c                 
    ||fS r%   r&   )r(   jr)   s      r   r*   zmatmul.<locals>.<lambda>K   r+   r   c                 
    | |fS r%   r&   )r'   r-   r)   s      r   r*   zmatmul.<locals>.<lambda>M   r+   r   )num_scalar_prefetchin_specs	out_specsgridscratch_shapes)parallelr4   	arbitrary)dimension_semantics)	out_shape	grid_speccompiler_paramsr   )r   	TypeErrorr   float32int8int4uint8uint4int32r   pallas_callr   jaxShapeDtypeStructshapepltpuPrefetchScalarGridSpec	BlockSpecVMEMCompilerParams)	r   r    r   r   r   r   	acc_dtypelrs	            r   matmulrM   +   s    ww!''CAGG9 MggY  Ikk)WW388SYY		::		I	$!Q
$$aggaj!''!*%=yI,,llAw<)?@llGQ<)?@ LL!Q)?@
aq!''!*2GH**aVY78	 **CE
  q!

 
r   )__doc__	functoolsrB   jax.experimentalr   r   jax.experimental.pallasr   rE   	jax.numpynumpyr   r   partialjitArrayintr   boolrM   r&   r   r   <module>rY      s   
  
 ) 0 : GGM "&&

yy&

yy&

 &
 yy4&
 &
 	YY&
&
r   