
    uki)                     B   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 ddl
mZ dej                  fd	Z G d
 de j                        Z	 	 	 ddej                  dedeeef   dedeeef   dz  dedefdZdefdZ	 	 ddededeej*                  z  dz  dedef
dZy)    N)mosaic_gpu_dialect)ir)arith)llvm   )utilsrefc                    t        j                  | j                        }|j                  dk7  rt	        d|       |j
                  d   |j
                  d   z  |j
                  d   |j
                  d   z  f}||j                  fS )zCReturns the 2D untiled shape and element type of a tiled 4D memref.   zExpected a 4D memref, got: r      r      )r   
MemRefTypetyperank
ValueErrorshapeelement_type)r	   ref_tylogical_shapes      `/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/experimental/mosaic/gpu/mma_utils.pytiled_memref_shaper      s|    =="&[[A
26(;
<<ll1oQ'a6<<?)J- 
++	++    c                   P    e Zd Z ej                         Z ej                         Zy)DimN)__name__
__module____qualname__enumautoKMN r   r   r   r   &   s    diik!tyy{"r   r   swizzle
group_sizelogical_k_major
large_tilemma_bytewidth_ksplit_constc                 J  ( t        j                  | j                        }t        j                  |j
                        (d|z  (z  }|j                         \  }	}
dt        f(fd}d x}}|r%|j                  \  }
}}}|	\  }}}}|\  }}|*|\  }}n$|j                  \  }}
}}|	\  }}}}|\  }}||\  }}d}d}d|z  (z  }d}|||k(  r||k(  s
|dk(  r||k  rt        |      ||hk7  r|dk(  r|dk(  rr||k(  rmt        j                  }|}||k(  sJ |dkD  rE||kD  r@|t        j                  |      k7  r(t        d| d	| d
t        j                  |       d      ||z  }|f}nJ||k(  rM|dk(  rH||k7  rt        d      t        j                  } ||      }||k(  sJ ||z  }|d|z  (z  k(  sJ ||z  f}nt        d      ||kD  r||} }n||}} ||k7  s| |k7  rt        d| d| d( d| d| d	|  d      |dk(  re|(z  ||z  k(  rZt        j                  }|} ||      }||k(  r|f}n||z  dk(  r||z  f}||z  |f}nn||k  rt        d      t!        d|d|      |(z  ||z  k(  r7|dk(  r2t        j                  } ||      } ||      }||z  }! ||      |!z  f}nt        d      t#        | ||||      }"t%        ||      \  }#}$|$rt        d| d|        ||      |#z  }%t%        ||      \  }&}$|$rt        d| d|        ||      |&z  }'|"||ff|%|'f|fS )N   stridec                 Z    dk\  rdz  dk(  sJ | z  dz  S dz  }| |z  dk(  sJ | |z  S )Nr*   r   r"   )r+   packingelement_bitwidths     r   to_byte_stridez)create_descriptor.<locals>.to_byte_stride9   sX    1!Q&&&&&!++%%gg"""wr   r   r"   r       zMMA layout with large tiles that is K-fastest only supports multiple MN tiles when the tiled MN dimension is a contiguous stack of tiles (z, z != )zZMMA layout with large tiles that is MN-fastest is only supported when the tiling is squarezMMA tiles must be contiguouszTiling should be (zM, swizzle_elems) where swizzle_elems = 8 * swizzle // bitwidth(dtype) (= 8 * z // z = z), but got (z[K dimension tiling is smaller than the width of a single MMA instruction. Increase swizzle.zk_group_size=z must be larger than k_tiling=)leading_byte_offsetstride_byte_offsetr#   r(   z1The M or N MMA instruction size was chosen to be zI, which is not a multiple of the tiling of the non-contracting dimension z,The K MMA instruction size was chosen to be zE, which is not a multiple of the tiling of the contracting dimension )r   r   r   r   bitwidthr   get_strides_and_offsetintr   setr   r    mathprodr   r!   NotImplementedErrorencode_descriptordivmod))r	   r#   r$   r%   r&   r'   r(   r   swizzle_elemsref_strides_r/   mn_large_tilek_large_tilemn_tilesk_tiling	mn_tilingk_tile_stridemn_tile_stridek_tiling_stridemn_tiling_stridek_group_sizemn_group_sizeIGNOREDMMA_ATOM_ROWSmma_width_kdesc_k_tilingfastest_dimr2   r3   desc_k_stridesslower_tilingfaster_tilingk_tiles_per_mma	desc_basemn_tiles_per_groupremmn_group_stridek_tiles_per_groupk_group_strider.   s)                                           @r   create_descriptorrZ   +   s    =="&^^F$7$78g+!11-002.+qS  "&%-,'-||$Ax9 EM>?4D #-L-$.!lM'-||$HaH ENM#3_ #-M<$.!m\'-O#'77+#%- 
(
"I%Q9};T j/m];
;
R
! 0H <EEk#%%% Q,i'		* 55 zN+4		*0E/FaI
 	
 )72')n	H	$)9Q)>		&)
 	
 FFk*>:\)))(72a'k-=====#g-/n566))%-y]m%-y]m%-)G}o .DDK9 Ms=/m_ M_A  ! 03C C}W^G^ ^EEk#).9	\	!)+("a'![02''1?Ck!0  "]\O3R"TUU	+	+}w/F	FK[_`K`FFk*>:)-8#}4o&}5GIn566	-+) #=)<c

;M? K[	" 
 #>25GG/!,9S

6|n EFFNZ	Q  "-03DD. =.12'
 r   xc                 B    | dz  dz	  }|dz  | k7  rt        d|        |S )N r   z*Cannot encode value in an MMA descriptor: )r   )r[   results     r   encode_addrr_      s4    KA&q[A
A!E
FF	-r   r2   r3   
const_initc           	         t         j                  j                  d      }t         j                  j                  d      t        | j                  t         j
                        rt        j                  | d      }n| }|j                  t         j                  j                  d      k(  sJ |j                         t        j                  |      }fd}	||t        j                  j                  k(  rd}
nk|t        j                  j                  k(  rd}
nK|t        j                  j                   k(  rd}
n+|t        j                  j"                  k(  rd}
nt%        |      t        j&                  t        j(                  | |	d	             |	d
            }|t+        |      dz  z  t+        |      dz  z  |
dz  z  }|rt-        j.                  ||      |fS |dz	  rE |	|dz        }t        j0                  |t-        j2                   |	|dz	         |	d                  }n |	|      }t        j0                  ||      S )Nr0   @   r   z!llvm.ptr<3>c                 0    t        j                  |       S )N)r   constant)r[   i64s    r   <lambda>z#encode_descriptor.<locals>.<lambda>   s    sA& r   r   r   r   r]   r      >   ?   l    )r   IntegerTypeget_signless
isinstancer   r   r   
memref_ptrTypeparser   ptrtointmgpu_dialectSwizzlingMode
kNoSwizzlek128ByteSwizzlek64ByteSwizzlek32ByteSwizzler:   lshrand_r_   r   truncior_shli)ref_argr2   r3   r#   r`   r(   i32ptrptr_valcswizzle_encodingencoded_base_addr
desc_constdesc_valre   s                 @r   r;   r;      s    	##B'#
##B'#bmm,


7A
&C
C	RWW]]>2	2<CHH<	2MM#s#'&!_<#=#=#H#HH,,,<<<,,,;;;,,,;;;
g
&&ii		'1W: >!E 	(	)R	/1	'	(B	.0 
R	!  <<./;;
 R:
*+h(EJJqr1A/BAbE$JKh:h88%x00r   )Nr0   F)r   F)r   r8   jax._src.libr   rq   jaxlib.mlirr   jaxlib.mlir.dialectsr   r    r   Valuer   Enumr   r6   tupleboolrZ   r_   rr   r;   r"   r   r   <module>r      s	      ;  & % ,BHH ,$))  *.g	gg c3hg 	g c3h$&g g gT3  /1/1 /1 <---4	/1
 /1 /1r   