
    uki                      U d Z ddlmZ ddlmZmZmZ ddlZddlZddl	Z	ddl
Z
ddlZddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lm
Z ddlmZ ddlmZ ddlmZ ddlZddlmZ  ddlm!Z" ddlm#Z# ddlm$Z% ddlm&Z' ddlm(Z( ddlm)Z) dZ* G d dejV                        Z, G d dejZ                        Z. ej^                  d      Z0 ejb                  d       G d d             Z2	 	 	 	 	 	 	 	 dkd Z3	 	 	 	 dld!Z4	 	 	 	 dmd"Z5	 	 	 	 	 	 dnd#Z6	 	 	 	 	 	 dod$Z7	 	 	 	 dpd%Z8	 	 	 	 dqd&Z9	 	 	 	 	 	 drd'Z:	 	 	 	 	 	 	 	 dsd(Z; ejb                          G d) d*             Z<e=e j|                  e?e2   f   Z@ee<ej                  geBe j                  e@f   f   ZDi ZEd+eFd,<   dtd-ZGdud.ZHdud/ZIdud0ZJ	 	 	 	 	 	 dvd1ZKg ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  D ]  Zt  eGet      eK         eGej                        	 	 	 	 	 	 dwd2       Zv eGej                        	 	 	 	 	 	 dxd3       Zx eGej                        	 	 	 	 	 	 dyd4       Zz eGej                        	 	 	 	 	 	 dzd5       Z| eGej                        	 	 	 	 	 	 d{d6       Z~ eGej                        	 	 	 	 	 	 dvd7       Z eGej                        	 	 	 	 	 	 dvd8       Z eGej                        	 	 	 	 	 	 d|d9       Z	 	 	 	 	 	 d}d:Z eGej                        	 	 	 	 	 	 d~d;       Zdd<Zdd=Z eGej                        	 	 	 	 	 	 dd>       Z eGej                        	 	 	 	 	 	 dd?       Z eGej                        	 	 	 	 	 	 dd@       Z	 	 	 	 	 	 ddAZ	 	 	 	 	 	 ddBZ eGej$                        	 	 	 	 	 	 ddC       Z eGej                        	 	 	 	 	 	 ddD       Z eGej*                        	 	 	 	 	 	 ddE       Z	 	 	 	 	 	 	 	 ddFZ eGej0                        	 	 	 	 	 	 ddG       Z eGej4                        	 	 	 	 	 	 ddH       Z eGej8                        	 	 	 	 	 	 ddI       Z eGej<                        	 	 	 	 	 	 ddJ       Z eGej@                        	 	 	 	 	 	 ddK       Z	 	 	 	 ddLZ eGejF                        	 	 	 	 	 	 ddM       Z eGejJ                        	 	 	 	 	 	 ddN       Z eGejN                        	 	 	 	 	 	 ddO       Z eGejR                        	 	 	 	 	 	 ddP       Z eGejV                        	 	 	 	 	 	 ddQ       Z eGejZ                        	 	 	 	 	 	 ddR       Z eGej^                        	 	 	 	 	 	 ddS       Z eGejb                        	 	 	 	 	 	 ddT       Z eGejf                        	 	 	 	 	 	 ddU       Z eGejj                        	 	 	 	 	 	 ddV       Z eGejn                        	 	 	 	 	 	 ddW       Z eGejr                        	 	 	 	 	 	 ddX       Z eGejv                         eGejx                        	 	 	 	 	 	 ddY              Z	 	 	 	 	 	 ddZZ eGej~                        	 	 	 	 	 	 dd[       Z eGej                         eGej                        	 	 	 	 	 	 dd\              Zdd]Z	 	 	 	 	 	 	 	 	 	 dd^Z	 	 	 	 	 	 dd_Z ejb                  d       G d` da             ZddbZddcZdddZddeZ	 	 	 	 ddfZddgZ	 	 	 	 ddhZe*di	 	 	 ddjZy)zDLayout and transform inference pass for the MLIR Mosaic GPU dialect.    )annotations)CallableIteratorSequenceN)assert_nevercast)logging)mosaic_gpu_dialect)ir)arith)math)memref)scf)vector   constraints)fragmented_array)inference_utils)launch_context)layouts)tcgen05)utilsi c                      e Zd ZdZdZdZdZy)VariableTypez_The type of a variable.

  Variables are operands, results, or arguments of MLIR operations.
  r   r      N)__name__
__module____qualname____doc__OPERANDRESULTARGUMENT     g/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/experimental/mosaic/gpu/layout_inference.pyr   r   B   s     '&(r%   r   c                  v    e Zd ZdZ ej
                         Z ej
                         Z ej
                         Zy)MemorySpacezThe memory space of a variable.N)	r   r   r   r    enumautoREGSMEMTMEMr$   r%   r&   r(   r(   L   s,    '		#	$	$r%   r(   z^(%\d+ = )?\S+T)frozenc                  n    e Zd ZU dZded<   ded<   ded<   dZd	ed
<   d Zedd       Zedd       Z	d Z
y)	ValueSitezA unique identifier for a variable.

  This class describes a particular role of a Value, either as a result of an
  operation, an operand of an operation, or a block argument.
  	ir.OpView	operationr   typeintindexNz
int | Noneregion_indexc                `    | j                   t        j                  k7  | j                  d u k(  sJ y N)r3   r   r#   r6   selfs    r&   __post_init__zValueSite.__post_init__h   s*    II...D4E4E4MNNNr%   c                   | j                   t        j                  k(  r#| j                  j                  | j
                     S | j                   t        j                  k(  r#| j                  j                  | j
                     S | j                  j                  | j                     j                  d   j                  | j
                     S )z6Returns the IR value corresponding to this value site.r   )r3   r   r!   r2   operandsr5   r"   resultsregionsr6   blocks	argumentsr9   s    r&   valuezValueSite.valuek   s     yyL(((^^$$TZZ00	l))	)^^##DJJ//^^##D$5$56==a@JJ4::VVr%   c                ~   | j                   j                  }t        j                  j	                  |      rt
        j                  S t        j                  j	                  |      sJ t        j                  |      rt
        j                  S t        j                  |      rt
        j                  S t        d|       )z4Returns the memory space associated with this value.zUnsupported memory space for: )rB   r3   r   
VectorType
isinstancer(   r+   
MemRefTyper   is_tmem_refr-   is_smem_refr,   
ValueError)r:   r3   s     r&   memory_spacezValueSite.memory_spaceu   s     ::??D	}}%__==##D)))			4	 
5dV<
==r%   c                   t         j                  t        | j                              }|J | j                  t
        j                  k(  r |j                  d       d| j                   S | j                  t
        j                  k(  r |j                  d       d| j                   S |j                  d       d| j                   S )Nr   z:o-z:r-z:a-)
_op_name_regexmatchstrr2   r3   r   r!   groupr5   r"   )r:   rM   s     r&   __str__zValueSite.__str__   s      T^^!45EyyL(((As4::,//	l))	)As4::,//As4::,//r%   )returnir.Value)rQ   r(   )r   r   r   r    __annotations__r6   r;   propertyrB   rJ   rP   r$   r%   r&   r0   r0   U   s[      
*!,
!O W W 
> 
>0r%   r0   c              #    K   |j                   j                  j                  j                  }t        j
                  t        j                  t        j                  t        j                  t        j                  g}|d   dz  dk(  r'|j                  t        j                  |d                |D ]Z  }t        |j                        t        |      kD  r%|j                  |      | j                  k(  sDt!        j"                  |       \ yw)zXYields layout candidates for the reduce equation `small = reduce(large, reduction_dims).   r   N)keyrB   r3   shapefaWGMMA_LAYOUTWGMMA_TRANSPOSED_LAYOUTTCGEN05_LAYOUTTCGEN05_TRANSPOSED_LAYOUTr   TMEM_NATIVE_LAYOUTappendfa_m64_collective_layoutlenbase_tile_shapereducecsRegisterLayout)smalllargereduction_dimslarge_shape
candidates	candidates         r&   2extract_assignment_candidates_from_reduce_equationrm      s      		$$**+oo  ""  * _rQg66{2GH )i
9$$%K(88'5;;6i((	)s   C/D2Dc                    | j                   j                  j                  }t        j                  j                  |      sJ t        j                  j                  |      S )zzReturns a strided layout for the given variable.

  If the given variable cannot have a strided layout, returns `None`.
  )	rX   rB   r3   r   rD   rE   rZ   WGStridedFragLayoutfrom_shaped_type)variabler3   s     r&   _strided_layout_for_variablerr      sH     
			 	 $		!	!$	''	'				0	0	66r%   c                   | j                   j                  }|j                  j                  }t	        |t
        j                        r?t        j                  t        |j                  j                        |j                  d      S y)zHReturns a default TMEM layout for the given variable, if one is defined.r   packingN)rX   rB   owneropviewrE   mgpuTmemAllocOpr   _infer_tmem_layouttupler3   rY   
collective)rq   rB   parents      r&   !_default_tmem_layout_for_variabler~      sf     ,,

%;;&(()%%ejj!2!2A  
r%   c              #    K   t        | j                  t        j                        sy |t	        | j
                        kD  ry | j
                  | d  }| j                  t        j                  t        j                  |            f y wr8   )	rE   exprre   Variablerb   tiling_multiple
SMEMTilinglcTileTransform)divide_constraintnum_tiled_dimstilings      r&   _extract_tiling_candidater      sq      
%**BKK	8
c+;;<<
,,n_-=>&b.>.>v.F GGGs   BBc              #    K   | j                   | j                  }}||fx  r8 dk(  r3\  t        j                  d x\   t        j                  d x	\    ||}}n>    r7 dk(  r2\  t        j                  d x\   t        j                  d x\   ||}}n  	 yt        |t        j                        sJ t        |t        j                        r|j                  }|j                  j                  t        j                  k(  rt        j                  |j                  j                  j                        j                  }ddt!        j"                  |      z  fD ]:  }| j%                  |      D ]$  \  }	}
||
k(  s|t        j&                  |	      f & < n|j                  j                  t        j(                  k(  rt+        j,                  |      rt/        |j                  j                  j                  t0        j2                  j4                        }t        j6                  ||      }|j9                  |      x}3t        j:                  ||g      \  }t=        t        j6                  |      }t?        |tA        |            E d{    nyt        |t        j&                        rQ|j                  }|jB                  }| j%                  |      D ]$  \  }	}
||	k(  s|t        j                  |
      f & yy7 rw)z=Attempts to extract variable assignments from a `Constraint`.r   r$   Nr       max_swizzle)"sourcetargetre   r   ConstantrE   rf   rB   rX   rJ   r(   r-   r   rF   r3   element_typer   bitwidthsupported_tmem_transfers
TMEMLayoutr,   r   is_mma_layout_infer_tiling_for_mma_refrx   SwizzlingModek128ByteSwizzleDividesgetmerge_divides_constraintsr   r   rb   vector_length)
constraintdivision_constraint_per_varsrctgtrq   constantlayoutdtyperu   tmem_layout
reg_layoutr   dividedivide2mergeds                  r&   5_extract_layout_candidates_from_memory_space_transferr      sh     
 1 1s#S	%	%	%h 
&	%	%h 
&	
	Hbkk	**	*"++,^^F||  K$4$44mmHLL..334AAeu~~e445 7''1'J'J(
 	7#K z!BMM+666		77 
	"	"k&6&6	6		&	&v	.*LL##**::
 Hf-266x@@GM 11672CD(6

F+&,VS[AAA 	"--(^^F""G#-#F#Fw#O 6Z	;	))*5556 ) 	Bs    E4K=7DK=K;	AK=K=c                    i }| D ]a  }t        |t        j                        st        |j                  t        j                        sC|j                  |vsJ |||j                  <   c |S r8   )rE   re   r   r   r   )r   resultr   s      r&   _divides_per_varr     sc     +-& +j*bjj)j/ __F*** *fZ__+ 
-r%   c              #    K   t        |       }| D ]  }|xt        j                  d x\    t        ||      E d{    1 xt        j                  dxe\  t        j
                  dxO\  xt        j                  d x7\   ccxt        j                  d x\   }}} t        |||      D ]  }||f 
     xt        j                  dxe\  xt        j                  d xM\   ct        j
                  dx8\  xt        j                  d x \   }}} t        |||      D ]  }||f 
 "    xt        j                  dx<\  xt        j                  d x$\   cxt        j                  d x\   }} ||f t   t        j                  dx;\  xt        j                  d x#\   cxt        j                  d x\   }}||f     y7 w)z>Attempts to extract variable assignments from all constraints.r$   N)axes)
r   re   IsTransferabler   EqualsReducer   rf   rm   Relayout)r   dpvcrh   r   rg   r   vars           r&   ._extract_variable_assignments_from_constraintsr     s     	%# a
2HCPPP `299`RYYA5r{{}DC_2CTCTCV`HPUW[\ 	Fv
	 a a299`1R&&(12993_=SR[[]`HPUW[\ 	Fv
	 a L2;;K+r{{}+-JR->->-@K6k L;;K4r((*46JbkkmK6k L 	Qs   9GGFGc              #    K   g }t        |j                        D ]R  \  }}|t        j                  d x1\  }t	        |t
        j                        s|j                  ||f       K 	 ||f T |D ]  \  }}||f  | D ]  }||j                  v r|j                  j                  xt        j                  k(  r) t        |      }|I|t        j                  |      f cxt        j                  k(  r |t        j                  d      f t        j                   k(  r(t#        |      }||t        j$                  |      f 	 t'        d|j                  j                          yw)z:Attempts to conjure an assignment for an unknown variable.)rB   NzUnsupported memory space: )r   r   re   rf   rE   rZ   TiledLayoutr`   assignmentsrX   rJ   r(   r+   rr   r,   r   r-   r~   r   rI   )unknownsconstraint_systemlow_priority_assignmentsrq   r   rB   r   s          r&   conjure_assignmentr   &  sg     EGJ## !h )E2>>1R ''8(<= *  ! 5 h
H
  Sh$000 ,,
#
#;??-h7"++F33
3;d+++28<"--//
/5hll6O6O5PQRR#Ss   CE1
A'E12?E1c                  t        j                  |      }t        |t         j                        rt        j                         |fS | D cg c]!  }||j                  j                         vs |# }}|sH|j                  rJ d       |j                  j                         D ci c]  \  }}|| v s|| c}}|fS t        ||      D ]  }|dk  rt        d      |dz  }|\  }}	t        j                  ||	i      |z  }
t        |
t         j                        rSt        | |
|      \  }}t        |t         j                        r||fc S  t        j                         |fS c c}w c c}}w )a  Attempts to find assignments that satisfy `constraint_system` for `unknowns`.

  Args:
    unknowns: the set of variables that are unknown. Represented as a sequence
      of `Variable`s for determinism purposes.
    constraint_system: the constraint system to satisfy.
    fuel: the fuel to use for the search. Once the fuel is exhausted, we raise
      an error.

  Returns:
    A tuple where the first element is the solution, and the second element is
    the fuel remaining after the search. The solution is either:
      - Unsatisfiable() if the constraint system has unsatisfiable constraints.
      - A dictionary assigning all the unknown variables to
        `ConstantExpression`s such that the assignment satisfies the constraint
        system otherwise.
  zVA satisfiable system should not have remaining unsatisfied constraints. This is a bug.r   zsLayout inference failed to find a solution. Consider adding layout annotations to your program to guide the search.r   r   fuel)re   rd   rE   Unsatisfiabler   keysr   itemsr   rI   ConstraintSystemfind_assignments_for)r   r   r   uremaining_unknownsvk
assignmentrq   r   new_constraint_systemsolutions               r&   r   r   Z  s   . ii 12!2#3#34t## Q&7&C&C&H&H&JJa  
 ,, 	',
 +66<<>A!x-1  '+ j qy= 
 	AIDNHd
4(89<MM  ')9)9:)'dNHd h 0 01t^+2 
			T	!!Ys   
!E$,E$$E)1E)c                      e Zd ZU dZ ej
                  ed      Zded<    ej
                  ed      Z	ded<   ddZ
dd	Zy
)DerivationContextzAHolds context information used for deriving an constraint system.F)default_factoryinitzdict[ValueSite, cs.Variable]variable_for_value_siteValueSitesForVariablevalue_sites_for_variablec                    |j                         D ]g  \  }}|| j                  v r| j                  |   j                  |       n|| j                  |<   |D ]!  }|| j                  vsJ || j                  |<   # i y r8   )r   r   extendr   )r:   mappingrq   value_sites
value_sites        r&   updatezDerivationContext.update  s    !( <+	T22	2%%h/66{C2=%%h/# <*!=!====3;$$Z0<<r%   c                2    | j                   t        |         S )z>Returns the producer reference variable for the given operand.)r   producer_result)r:   operands     r&   producer_refzDerivationContext.producer_ref  s    ''(@AAr%   N)r   r   rQ   None)r   r0   rQ   cs.Variable)r   r   r   r    dataclassesfielddictr   rS   r   r   r   r$   r%   r&   r   r     sQ    I:K+:K:K;7 
 k59 1 <Br%   r   z)dict[str, ConstraintSystemDerivationRule]#_constraint_system_derivation_rulesc                     d fd}|S )Nc                2    | t         j                  <   | S r8   )r   OPERATION_NAME)ruleops    r&   wrapperz7_add_constraint_system_derivation_rule.<locals>.wrapper  s    	~?C)"*;*;<Kr%   )r   ConstraintSystemDerivationRuler$   )r   r   s   ` r&   &_add_constraint_system_derivation_ruler     s    
 
.r%   c                T    t         j                  j                  | j                        S r8   )r   rD   rE   r3   r   s    r&   	is_vectorr     s    		!	!!&&	))r%   c                    t         j                  j                  | j                        xr t	        j
                  |       S r8   )r   rF   rE   r3   r   rH   r   s    r&   _is_smem_refr     +    		!	!!&&	)	Be.?.?.BBr%   c                    t         j                  j                  | j                        xr t	        j
                  |       S r8   )r   rF   rE   r3   r   rG   r   s    r&   _is_tmem_refr     r   r%   c                z    ~ t        |      }t        j                  |d         }t        j                         ||ifS )NrV   )vector_value_sitesre   r   r   )ctxr   all_value_sitesrq   s       r&   _pointwise_op_constraint_systemr     s=     
&r*/[[,-(				? ;	;;r%   c                P   t        |t        j                  d      }t        j                  |      }||gi}t        j
                  |t        j                        g}t        j                  |j                        rt        |t        j                  d      }| j                  |      }|g||<   t        t        j                  |j                  j                         j"                        }|j%                  t        j&                  |||             t        j(                  |      }	|	|fS Nr   r   )r0   r   r"   re   r   	NotOfTyperZ   WGSplatFragLayoutr   rH   r   r!   r   r{   r   rF   r3   rY   r`   r   r   )
r   r   destdest_varr   r   r   
source_varrY   systems
             r&   _vector_load_constraint_systemr  #  s     
2|**A	.$[[(&/h(<(<=>+ ryy!r<//3F!!&)J,28Z("--		/556Er((XuEF;7&	)	))r%   c                
   t        |t        j                  d      }t        j                  |      }||gi}g }t        j                  |j                        rt        |t        j                  d      }| j                  |      }|g||<   t        t        j                  |j                  j                        j                        }|j                  t        j                  |||             t        j                   |      }	|	|fS Nr   r   r   )r0   r   r!   re   r   r   rH   destinationr   r{   r   rF   r3   rY   r`   r   r   )
r   r   rB   	value_varr   r   r   r   rY   r  s
             r&   _vector_store_constraint_systemr  ?  s     B,,a
0%kk% )'%1 +
r~~&R--q1D%H*.X&"-- 3 34::;Er((HeDE;7&	)	))r%   c                    ~ t        |t        j                  d      }t        j                         t        j
                  |      |gifS Nr   )r0   r   r!   re   r   r   )r   r   rB   s      r&   _debug_print_constraint_systemr
  [  s?    
 

B,,a
0%				U!3eW =	==r%   c                    t        |t        j                  d      }t        |j                        rt        j                  |      n| j                  |      }t        j                         ||gifS r	  )	r0   r   r!   r   rB   re   r   r   r   )r   r   rB   r   s       r&   _print_layout_constraint_systemr  e  sX    
 B,,a
0%'1Es7G7G7N#				ug	..r%   c                    ~ t        |t        j                  d      }t        j                  |      }t        j
                  |t        j                        g}t        j                  |      ||gifS r   )	r0   r   r"   re   r   r   rZ   r   r   )r   r   rB   r   r   s        r&   #_broadcasted_iota_constraint_systemr  o  s^    
 

B++Q
/%
E#c2#7#789+				5eW~	EEr%   c                T   ~ i }t        |j                        D ]x  \  }}t        |      st        j                  t        |t        j                  |            }t        |t        j                  |      t        |t        j                  |      g||<   z t        j                         |fS r8   )
	enumerater=   r   re   r   r0   r   r!   r"   r   )r   r   r   ir   rq   s         r&   '_optimization_barrier_constraint_systemr  {  s    
 
46bkk* jaW{{9R)=)=qABH"l**A."l))1-*X&	 
			 8	88r%   c                p   ~ t        |t        j                  d      }t        j                  |      }t        j                  t        t        t        j                  |j                  j                        j                              }t        j                  |t        j                  |      i      }|||gifS Nr   r   )r0   r   r"   re   r   rZ   r   r{   r   r   
ShapedTyper   r3   rY   r   rf   )r   r   r   rq   r   r  s         r&   _vector_splat_constraint_systemr    s    
 
R,,a0&[[ (d2=="))..&I&O&O PQ&R..v67& 
(VH%	%%r%   c                x   ~ |j                   }t        |t        j                  d      }t	        j
                  |      }t        t        j                  |j                  j                        j                        }t        j                  j                  |      rat        j                  |      j                  rBt        j                   |      }t	        j"                  |t	        j$                  |      i      }n;t	        j&                  |t        j                         }t	        j"                  |g      }|||gifS )Nr   )rY   r   r   )rB   r0   r   r"   re   r   r{   r   r  r   r3   rY   DenseElementsAttrrE   is_splatrZ   r   r   rf   r   )	r   constant_oprB   r   rq   rY   r   r  constant_is_not_splats	            r&   _constant_constraint_systemr    s    
 



%[,"5"5q9&[[ (
k00556<<
=%%%e,


u
%
.
.!!.F  r0089F LL23G3GH  .C-DEF	(VH%	%%r%   c                    | j                   t        | j                         dz
     }t        ||      sJ |j                  S )zdReturns the terminator of the given block.

  Checks that the terminator is of the expected type.
  r   )
operationsrb   rE   rw   )blockexpected_terminator
terminators      r&   _terminatorr"    sA     E$4$4 5 9:*	J 3	44	4			r%   c                l   |j                   j                  \  }t        |t        j                        }i }d}t        |j                        D ]  \  }}t        |      st        |      s||z
  }||z
  dz   }	t        |t        j                  |      }
t        |t        j                  |	d      }t        |t        j                  |      }t        |t        j                  |      }t        |      rt        j                  |
      n| j!                  |
      }|
|||g||<    t        j"                         |fS )N   r   r   r6   )regionr@   r"  r   YieldOpr  r=   r   r   r0   r   r!   r#   r"   re   r   r   r   )r   r   r  yield_opr   num_leading_argsr5   oresult_index	arg_indexr   argr   yield_operandr   s                  r&   _for_constraint_systemr/    s   
 II'5,(46 BKK( JheQQ<Q++L((1,IL00%8G
B--yq
ICr<..=F,&&M #,A,"++g
C4D4DW4MC%,c6=$IS!J 
			 8	88r%   c                    g }d}||z  | k  r5| |z  dk(  r| |z  } |j                  |       | |z  dk(  r|dz  }||z  | k  r5| dk7  r|j                  |        |S )zReturns the prime decomposition of the given number `n` as a list of ints.

  A factor appears as many times in the list as the power up to which it divides
  `n`.
  r   r   r   )r`   )nprime_factorsdivisors      r&   prime_decompositionr4    s     -''Q
g+
Gma7# g+
 qLG	 	'Q
 !V	r%   c                h   | dk  rt        d      t        j                  j                  |j                        sAt        j
                  j                  |j                        st        d|j                         t        |j                  t        j                        rbt        |j                  j                  t        j                        r4t        j                  | |j                  j                  j                        S d}t        |       D ]!  }t        j                   |||z        s||z  }# |S )Nr   za must be strictly positivez)Expected an integer dynamic value, got a r   )rI   r   IntegerTyperE   r3   	IndexTyperv   	Operationrw   r   
ConstantOpr   gcdliteral_valuer4  r   is_known_divisible)abrunning_gcdfactors       r&   dynamic_gcdrA    s    !V
2
33		"	"166	*2<<3J3J1663R
@I
JJ&:aggnneFVFV+W88Aqww~~3344+#A& f;#78Vk 
r%   c                   ~ |j                   j                  \  }|j                  j                  \  }t        |t        j
                        }t        |t        j                        }i }t        |      D ]  }|j                  }|j                  xt        j                  k(  rU t        |t        j                  |d      }	t        |t        j                  |      }
||	|
g|t        j                  |      <   xt        j                   k(  rX t        |t        j                  |dz         }t        |t        j                  |d      }	||	|g|t        j                  |      <   }t#        |        t        j$                         |fS )Nr   r%  r   )beforer@   afterr"  r   ConditionOpr'  r   r5   r3   r   r!   r0   r#   re   r   r"   r   r   )r   r   before_blockafter_blockcond_opr(  r   r   idxr-  r.  cond_operandnevers                r&   _while_constraint_systemrL    sE   
 
99##.<((//-;coo6'ckk2(46&r* j


C
//<L113QG!(L,@,@#F=
 Z!89
 < ,*>*>aHL113QG=
 Z!89
 U+. 
			 8	88r%   c                   ~ t        |      D ci c]  }t        j                  |      |g }}|j                  D ]  }|j                  \  }t        |t        j                        }|j                         D ]n  }|j                  j                  t        j                  k(  sJ t        |t        j                  |j                  j                        }||   j!                  |       p  t        j"                         |fS c c}w r8   )r   re   r   r?   r@   r"  r   r'  r   rX   r3   r   r"   r0   r!   r5   r`   r   )	r   r   r*  r   r&  r  r(  r   r.  s	            r&   _index_switch_constraint_systemrN  *  s    
 
#5b#95bkk!nqc5 5 

 AfmmGU5#++.H.335 A
^^  L$7$7777
L((*..*>*>m z*11-@AA 
			 8	885s   C?c                :   ~ t        |t        j                  d      }t        |t        j                  d      }t	        j
                  |      }t	        j                  t        j                  |j                              }t	        j                  ||i      |||gifS r  )r0   r   r!   r"   re   r   rf   layouts_libfrom_layout_attr
new_layoutr   )r   r   r   r   rq   
out_layouts         r&   _layout_cast_constraint_systemrT  @  s    
 
b,..2'R,,a0&[[!(  !=!=bmm!LM*x&<='6"#
 r%   c                $   t        j                  | j                        }| j                         \  }}t	        j
                  |      }| j                  |   }t        j                  j                  t        j                  j                  t        j                  j                  t        j                  j                  fD ]  }||kD  r	||z  }||z  dk(  s|}	 n t        | j                   d      d}
|t        |      dz
  k7  }|r|	|
f}|S |
|	f}|S )Nr   z is not a valid WGMMA shape   r   )r   	bytewidthr   get_strides_and_offsetnpargminrY   rx   r   r   k64ByteSwizzlek32ByteSwizzle
kNoSwizzlerI   rb   )ref_tyr   element_bytewidthstrides_min_dim_index	minor_dimswizzleswizzle_elemsminor_tilingmajor_tiling
transposedr   s                r&   r   r   P  s    oof&9&9:,,.*'1))G$-ll=)) ((
''
''
##	 Cg 00M= A%"lC ~%@A
BB,Gq 00*L)F 
- L)F	-r%   c                   t        |t        j                  j                        }t	        |t        j                  |            }t        j                  j                  |       sd|fS t        t        t        j                  |       |      }t	        | t        j                  |            }||k7  r3t        ||      }t	        |t        j                  |            }||k(  sJ ||fS )zInfers the tiling for a (if in SMEM) and b of a WGMMAOp.

  If both a and b are in SMEM, this function infers tilings that have matching
  swizzle values.
  r   N)r   rx   r   r   _compute_swizzler   r   r   rF   rE   r   )a_typeb_typeb_tiling	b_swizzlea_tiling	a_swizzles         r&   _infer_wgmma_tilingrq  r  s     '$,,<<( vr'7'7'AB)		!	!&	)>&
2==&!y( vr'7'7'AB)) )YGH )9)9()CDI	!!!	8	r%   c                   i }i }t        |t        j                  d      }t        |t        j                  d      }t	        j
                  |      }t	        j                  t        j                        ||<   ||g||<   t        |j                  j                  |j                  j                        \  }}t        |t        j                  d      }	| j                  |	      }
t	        j                  t        j                   |            ||
<   |	g||
<   t        |t        j                  d      }t#        |j                        r=| j                  |      }t	        j                  t        j                   |            ||<   n|J t	        j
                  |      }t$        j&                  j)                  d      t%        j*                  |j                  j                        j,                  k(  r't	        j                  t        j.                        ||<   n&t	        j                  t        j                        ||<   |g||<   t	        j0                  |      |fS )Nr   r   r   rV  )r0   r   r"   r!   re   r   rf   rZ   r[   rq  r=  r3   r>  r   r   r   r   r   r   r6  get_signlessrD   r   WGMMA_LAYOUT_8BITr   )r   r   r   r   acc_outacc_inacc_varro  rm  r>  b_varr=  a_vars                r&   _wgmma_constraint_systemrz    s   
 13+46b,--q1'R--q1&KK '**2??;+g'-w&77#*24499bddii@(HL((!,!


1
%}}R%5%5h%?@+e%&C5!L((!,!"$$QEr'7'7'ABKKKNE	~~""1%rttyy)A)N)NN,,R-A-ABk%,,R__=k%%&C5!			[	)+C	CCr%   c                   ~ t         j                  j                  |j                  j                        rt        d      t        j                  t        |t        j                  d            }t        j                  t        j                  t        |j                  j                  j                                     }t        j"                  ||i      ||j$                  gifS )Nz2Only vector broadcasts from scalars are supported.r   r   )r   r  rE   r   r3   NotImplementedErrorre   r   r0   r   r"   rf   rZ   r   r{   r   rY   r   rX   )r   r   out_variabler   s       r&   #_vector_broadcast_constraint_systemr~    s    
 
 ]]biinn-
R
SSYr<+>+>BC,R11%		8L8L2MNO&|V&<=l&&'(
 r%   c                    ~ t        j                  t        |t        j                  d            }t        j
                         ||j                  gifS r	  )re   r   r0   r   r!   r   rX   )r   r   in_variables      r&   #_vector_reduction_constraint_systemr    sD    
 
Ib,*>*>BC+				{.? @	@@r%   c                    t        j                  |t        j                  | |            t        j                  | t        j
                        gS Nlhsrhs)re   r   r   r   rZ   ro   )largersmallerri   s      r&   _reduction_constraintsr    s;     	iiG6>!BCll62112
 r%   c                x   ~ t        |t        j                  d      }t        |t        j                  d      }t        |t        j                  d      }t	        j
                  |      }t	        j
                  |      }t        ||t        |j                              }t	        j                  |      ||g|||gifS r  )
r0   r   r!   r"   re   r   r  r{   ri   r   )r   r   r   accoutsource_variabler}  reduction_constraintss           r&   &_multi_dim_reduction_constraint_systemr    s    
 
R--q1&"l**A.#"l))1-#KK'/S!,0B 	&;<,c
;
 r%   c                
   ~ t        j                  t        t        j                  d            }t        j                  t        t        j
                  d            }t        t        t        j                  j                  j                        j                        }t        fdt        t        |            D              }t        |||      }t        j                   |      ||j"                  g||j"                  gifS )Nr   c              3  @   K   | ]  }|j                   vs|  y wr8   )broadcast_dimensions).0r  r   s     r&   	<genexpr>z6_broadcast_in_dim_constraint_system.<locals>.<genexpr>  s#      !23J3J*Jas   r   )re   r   r0   r   r"   r!   r{   r   r   r  r   r3   rY   rangerb   r  r   rX   )r   r   r}  r  	out_shaperi   r  s    `     r&   #_broadcast_in_dim_constraint_systemr    s    
 
Yr<+>+>BC,KK	"l.B.BA FG/D		7==>) s9~& . 1O^
 	&;<
O//0
))*
 r%   c                   ~ t        t        t        j                  |j                  j
                        j                        }t        t        t        j                  |j                  j
                        j                        }t        j                  t        |t        j                  d            }t        j                  t        |t        j                  d            }t        j                  |||      }t        j                  |||      }t        j                  t        j                   ||      t        j                   ||      g      ||j"                  g||j"                  gifS )Nr   )source_shapetarget_shaper  r   )r{   r   r   r  r   r3   rY   r   re   r   r0   r   r!   r"   Reshaper   r   rX   )r   r   in_shaper  r  r}  	in_to_out	out_to_ins           r&   _shape_cast_constraint_systemr    s    
4ryy~~6<<=(D		7==>)Ib,*>*>BC+Yr<+>+>BC,  jjy) jj)
 	iiLi8iiKY7 [__%|l6F6F5GH
 r%   c                   ~ t        d |j                  D              rt        d      t        |t        j
                  d      }t        |t        j                  d      }t        j                  |      }t        d |j                  D              }t        j                  ||      t        j                  |t        j                        t        j                  |t        j                        g}t        j                   |      |||gifS )Nc              3  `   K   | ]&  }t        j                  |      j                  d k7   ( ywr   Nr   IntegerAttrrB   r  ss     r&   r  z;_extract_strided_slice_constraint_system.<locals>.<genexpr>=  s$     :!		 	 A	%:s   ,.z`strides` must contain only 1s.r   c              3  Z   K   | ]#  }t        j                  |      j                   % y wr8   r  )r  r*  s     r&   r  z;_extract_strided_slice_constraint_system.<locals>.<genexpr>B  s     >a"..#))>s   )+r   )anyr`  r|  r0   r   r!   r"   re   r   r{   offsetsr   r   rZ   r   ro   r   )r   r   r   r   rq   r  r   s          r&   (_extract_strided_slice_constraint_systemr  8  s     
:rzz::
?
@@b,..2'R,,a0&[[!(>2::>>'jj7# 	ll8R112ll8R334+ 	k2 '6"#	
 r%   c                   i }g }t        |j                        }t        |j                        }g }t        |j                        D ];  \  }}t        |      rtt        j                  t        |t        j                  |            }	|j                  |	       t        j                  t        j                  t        |                  ||	<   t!        |      st        |t        j                  |      }
| j#                  |
      }t        j                  |
      }	|j                  t        j$                  ||	             |j                  |	       t        |      }|
j&                  j(                  }t+        ||      }|||	<   > t        |j,                        }t        |j.                        D ]  \  }}t0        j2                  j5                  |j(                        s0t        j                  t        |t        j6                  |            }	|j                  |	       t        j                  t        j                  t        |                  ||	<    t        j8                  ||      |D 	ci c]  }	|	|	j:                  g c}	fS c c}	w r  )iter
in_layoutsin_transformsr  r=   r   re   r   r0   r   r!   r`   rf   rP  rQ  nextr   r   r   rB   r3   0_extract_smem_tiling_from_custom_transform_attrsout_layoutsr>   r   rD   rE   r"   r   rX   )r   r   r   r   r  r  	variablesr  r   r   r   r   
transformsr^  r   r  r   s                    r&   #_custom_primitive_constraint_systemr  R  s   
 13+%'+BMM"*r''(-!#)bkk* ja
++iL$8$8!<
=aq((

&
&tJ'7
8k!n 
g	 R!5!5q9j##J/j
++j
!azq9:q&j$$f?
Sfk!n36 R^^$+RZZ( ia	}},
++iL$7$7;
<aq((

&
&tK'8
9k!n	 	+{3$%aq155'z%
 %s   -Jc                    t        j                  |       }t        |t        j                        sJ t        j                  |j                  |j                  |j                  |j                        S r8   )rP  rQ  rE   rZ   r   r   r   r   	warp_dims	lane_dims
vector_dim)layout_attrr   s     r&   _tmem_layout_from_layout_attrr    sZ     ''4&	FBNN	++	+			mmV%%v'7'79J9J
 r%   c                   t        |t        j                  d      }| j                  |      }t        |t        j                  d      }t        j                  t        |j                              }t        j                  ||i      |||gifS r  )
r0   r   r!   r   r"   re   r   r  rR  r   )r   r   r   rq   r   rS  s         r&   #_tmem_layout_cast_constraint_systemr    s~    
 b,..2'g&(R,,a0&}}:2==IJ*x&<='6"#
 r%   c                2   ~ t        |t        j                  d      }t        j                  |      }t        |t        j
                  d      }t        j                  |      }|t        j                  d       i}||g||gi}t        j                  |      |fS r  )r0   r   r"   re   r   r!   r   r   )r   r   r   
result_varin_smemin_smem_varr   operands_for_variables           r&   _tmem_alloc_constraint_systemr    s    
 
R,,a0&{{6"*b,..2'G$+2==&1+ &xwiH				57L	LLr%   c                    t        |t        j                  d      }| j                  |      }t	        j
                         ||gifS r	  )r0   r   r!   r   re   r   )r   r   r   rq   s       r&   _tmem_dealloc_constraint_systemr    sC    
 b,..2'g&(				G9 5	55r%   c                   i }i }t        |t        j                  d      }| j                  |      }t	        j
                  |j                  j                        }t        j                  t        |j                        |j                  d      }t        j                  |      ||<   |g||<   t        |j                         rt        |t        j                  d      }t	        j
                  |j                   j                        }	| j                  |      }
dt#        j$                  |	j&                        z  }t        j                  t        |	j                        |j                  |      }t        j                  |      ||
<   |g||
<   |j                  j                  j                  d   }|dk(  r|j                  j(                  s|j*                  j                  j                  d   t#        j$                  |j*                  j                  j&                        dt-        fdt/        t0        j2                        D              }nt0        j2                  j4                  }t7        t	        j8                  |j*                  j                        |      }t        |t        j                  d      }| j                  |      }t        j:                  t=        j>                  |            ||<   |g||<   tA        |j                         rt7        t	        j8                  |j                   j                        t0        j2                  j4                        }t        |t        j                  d      }| j                  |      }
t        j:                  t=        j>                  |            ||
<   |g||
<   t        jB                  |	      |fS )
Nr   r   rt   r   @   r   c              3  >   K   | ]  }d |z  z  z  k  r|  yw)rV  Nr$   )r  r  Nelement_type_bitwidthn_lane_groupss     r&   r  z1_tcgen05_mma_constraint_system.<locals>.<genexpr>  s0      q5))Q--?? 	
s   r   r   )"r0   r   r!   r   r   r  accumulatorr3   r   rz   r{   rY   r|   re   r   r   r=  r   r   r   rB   r>  r  reversedrx   r   r   r   rF   r   r   r   r   r   )r   r   r   r  r  acc_variableacc_type
acc_layoutr=  rk  ry  ru   a_layoutMmax_b_swizzlerm  r>  rx  ro  r  r  r  s                      @@@r&   _tcgen05_mma_constraint_systemr    s   
 13+13 	"l**A.#!!#&,]]2>>../())HNNR]]A* !mmJ7+l),%"$$"l**A.A]]24499%FQEENN6#6#677G))fllR]]GH x0K$%3%  	nn"!"WR]]((
		A!NN24499+A+ABM $,,- M &&66M&r}}RTTYY'?O(L((!,!


1
%}}R%5%5h%?@+e"#"$$(
bddii &&66H 	"l**A.AQEr'7'7'ABK$%3% 				57L	LLr%   c           	        t        |t        j                  d      }| j                  |      }t        |t        j                  d      }t        j                  |      }t        j                  ||t        t        j                  |j                  j                        j                              }t        j                  |g      ||g||gifS r   )r0   r   r!   r   r"   re   r   r   r{   r   r  r   r3   rY   r   r   r   r   r  r  destination_variabler   s          r&   "_async_load_tmem_constraint_systemr    s    
 R--q1&$$V,/"l1115+[1  BMM"))..)//0* 	zl3"6F
 r%   c                    t        |t        j                  d      }| j                  |      }t        |t        j                  d      }t        j                  |      }t        j                         ||g||gifS r	  )r0   r   r!   r   r"   re   r   r   )r   r   r   operand_variabler   result_variables         r&   _slice_tmem_constraint_systemr    sq    
 b,..2'%%g.R,,a0&KK'/'OfX>
 r%   c           	        t        |t        j                  d      }t        j                  |      }t        |t        j                  d      }| j                  |      }t        j                  ||t        t        j                  |j                  j                        j                              }t        j                  |g      ||g||gifS r  )r0   r   r!   re   r   r   r   r{   r   r  r   r3   rY   r   r  s          r&   #_async_store_tmem_constraint_systemr    s    
 R--q1&KK'/"l22A6+))+6  BMM"))..)//0* 	zl3"6F
 r%   c                    ~ t        |t        j                  d      }t        j                  |      }t        j
                         ||gifS r	  )r0   r   r"   re   r   r   )r   r   resres_vars       r&   _slice_smem_constraint_systemr  /  sD    
 
"l))1-#KK'				3% 0	00r%   c                   t        |t        j                  d      }t        |t        j                  d      }| j	                  |      }t        d |j                  D              rt        d|j                   d      g }d}t        |j                        D ]  \  }}|j                  |   }	|	t        j                  j                         k(  r|j                  |   }	|dz  }t        j                  j                  |      rg }lt        j                   |j"                  j$                        }
t'        j(                  ||
j*                  |         }t-        |	t.              rt'        j(                  ||	      }nt1        ||	      }|j3                  |        t5        j6                  |t9        |            g}t5        j:                  |      }||||gifS )Nr   c              3  &   K   | ]	  }|d k7    ywr  r$   r  s     r&   r  z4_memref_subview_constraint_system.<locals>.<genexpr>C  s     +Aa+s   z(Only unit strides are supported but got .r   r   )r0   r   r!   r"   r   r  static_stridesr|  r  static_sizesstatic_offsetsr   r  get_dynamic_sizer  is_dynamic_sizerF   r   r3   r   r:  rY   rE   r4   rA  r`   re   r   r{   r   )r   r   r   r   source_dest_varr   dynamic_offset_indexr  sizeoffsetsrc_typedivisibility_constraintr   r  s                 r&   !_memref_subview_constraint_systemr  :  s   
 R--q1&	2|**A	.$$$V,/+**++

223D3D2EQG 
 /2??+ 6gaq!F//11zz./fa 
}}$$T*oryy~~.h $x~~a/@ A	FC	 "&((+BF"K"-.Ev"N45+6. OU?-CDE+;7&	/FD>2	22r%   c                    t        |t        j                  d      }| j                  |      }t        |t        j                  d      }t        j                         |||gifS r	  )r0   r   r!   r   r"   re   r   )r   r   r   var_source_destr   s        r&   !_memref_cast_op_constraint_systemr  g  sY    
 R--q1&$$V,/	2|**A	.$				64. A	AAr%   c                   t        j                  |j                  j                        }t	        |j
                        dk7  rt        d|       |j                         \  }}t        j                  |j                  j                        j                         \  }}||k7  }t        |t        j                  d      }t        |t        j                  d      }| j                  |      }	|st        j                         |	||gifS t        j                   |      }
t        j"                  t        j$                  |	      |
      t        j"                  |	t        j$                  |
            g}t        j                  |      }||	|g|
|gifS )Nr   z#Only 2D memrefs are supported, got r   r   )r   rF   in_r3   rb   rY   r|  rX  r   r0   r   r!   r"   r   re   r   r   r   	Transpose)r   r   in_ty
in_stridesra  out_strides	transposer   r   r   r   r   r  s                r&   &_memref_transpose_op_constraint_systemr  r  sB   
 --
$%
 CE7K
LL..0-*a==0GGI.+qK')R--q1&	2|**A	.$'*	 :~">>>[[(iiZ((3ii
BLL23+ ;7&	*vh4&9	99r%   c                   t        j                  t        j                  |j                  j
                              rt        d      t        |t        j                  d      }t        |t        j                  d      }| j                  |      }g }t        t        |j                        t        |j                              D ]E  \  }}t        j                   j#                  |      st%        |      dkD  r n|j'                  |       G t)        j*                  |t-        t        |                  g}t)        j.                  |      |||gifS )Nz6Transposed memrefs are not supported in ExpandShapeOp.r   r   r   )r   is_memref_transposedr   rF   r   r3   r|  r0   r   r!   r"   r   zipr  static_output_shapereassociationr  r  rb   r`   re   r   r{   r   )	r   r   r   r   r   reverse_tiling_multipledimrI  r   s	            r&   '_memref_expand_shape_op_equation_systemr    s   
 bffkk :;
@  R--q1&	2|**A	.$ #r%%&1A1A(B (hc3 
}}$$S)SX\ ""3'( Cx0G'H!IJK+				5fd^7L	LLr%   c                   ~ t        j                  |j                  j                        j                  }|r|dgk7  rt        d|       t        |t        j                        rdnd}t        |t        j                  |      }t        j                  |      }|t        j                  d       i}t        j                  |      ||gifS )Nr   z'Only scalar memrefs are supported, got r   r   )r   rF   r   r3   rY   r|  rE   LoadOpr0   r   r!   re   r   r   r   )r   r   	ref_shaperef_op_indexrefr   r   s          r&   '_memref_load_store_op_constraint_systemr	    s     
mmBIINN+11)9#

1)=  !V]]3,"l**L9#
C#14bmmD6I0J+				5cU|	CCr%   c           	        |D cg c]  }t        j                  |       }}|x  r dk(  r  d }d }ny x  r% dk(  r \  xt        j                  d x	\   } |}d }nQ    r= dk(  r8\  xt        j                  d x \   cxt        j
                  d x	\   }}|}|}n   	 t        d|       |&t        | |      }||k7  rt        d| d| d|  d	      t        j                  |      S c c}w )
Nr   r   r$   r   zUnsupported transforms z%Cannot honor caller-provided swizzle z, that is different from the computed swizle z
 for type r  )
rP  from_transform_attrr   r   rx   r   r|  rj  re   r   )	ref_typetransform_attrsxr  tile_transformrd  tr  computed_swizzles	            r&   r  r    s    =LLq//2L*L	ng 
 
#	"
!"


	"ng 
# 
>	=
!"



!#<4#5#5#7	=ng 
> 
"9* FGG'.A7"1' ;&&6%7z(1N 
 
~	&&- Ms   C)c                $   t        |t        j                  d      }t        |t        j                  d      }| j	                  |      }t        |j                  j                  |j                        }||i}t        j                  |      |||gifS r  )r0   r   r!   r"   r   r  r  r3   r  re   r   )r   r   r   r   r   r   r   s          r&   "_with_transforms_constraint_systemr    s|    
 R--q1&	2|**A	.$ #;BFFKKW&14f+				5fd^7L	LLr%   c                   g }t        |j                  |j                  d      D ]&  \  }}|dk(  r|j                  t	        ||             ( t        |t        j                        rdnd}t        |t        j                  |      }| j                  |      }t        j                  |t        |            g}t        j                  |      ||gifS )NT)strictrV   r   r   )r   r   r   )r  slice_lengthsindicesr`   rA  rE   rx   AsyncLoadOpr0   r   r!   r   re   r   r{   r   )	r   r   r   r  r5   operand_indexr   r   r   s	            r&   #_async_load_store_constraint_systemr    s     /))2::dC 5kdErz;tU34	5 ""d&6&67!Q-b,..>'!#eO6LMN+				5gY7G	GGr%   c                   t        j                  |       rt        t        dd|        t        j                  |       rt        t
        dd|        t        j                  |       rt        t         j                  dd|        y y )Nr   r   tmem_layoutszTMEM refr  zSMEM ref)r   should_have_layout_ensure_right_number_of_layoutsr   should_have_tmem_layoutr   should_have_transformsis_transformable_smem_memrefr   s    r&   _ensure_all_layouts_are_setr#    se    ''+#Iy(BG,,R0#L.*bQ++B/#44lJPR 0r%   c                |   fd} |d|       } |d|       }t        t        | j                              }t        |      |k7  r#t	        d| dt        |       d| d| d 
      t        t        | j
                              }t        |      |k7  r#t	        d	| dt        |       d| d
| d 
      y)zEnsures that the right number of in/out layouts are provided for an op.

  Layouts here are can be vector layouts, TMEM layouts, or SMEM transforms.
  c                B    | j                   v rj                   |    S g S r8   )
attributes)attrr   s    r&   <lambda>z1_ensure_right_number_of_layouts.<locals>.<lambda>  s    0Et, 2 r%   r  out_zExpected the same number of in_z (z) as z operands (z	). op=
  z Expected the same number of out_z
 results (N)summapr=   rb   rI   r>   )		filter_fnattr_suffix
value_typer   r   r  r  num_matching_operandsnum_matching_resultss	      `     r&   r  r  
  s     N'[M*+*${m,-+c)R[[9:_--

)+bZ8I,k"7!8
2$	H  SBJJ78--

*;-r#k:J9K L\$8#9B4	I  .r%   c                    |t         j                  j                  S t        j                  j                  |       st        d|  d      t        j                  |       }|j                         \  }}|j                  }t        |      t        |      kD  r$t        dt        |       dt        |       d      |t        j                  |t        |       d          }|t        j                  |j                        z  }|t         j                  j                  t         j                  j                   t         j                  j"                  t         j                  j                  fv sJ t        j                  |      S )zCComputes the swizzle mode given a tiling transform and a data type.NzExpected a MemRefType, got r  zThe tile rank (z)) cannot be greater than the ref's rank (z).)rx   r   r]  r   rF   rE   rI   rX  r   rb   rY  rZ  r   rW  r   r   r[  r\  )r3   r  r^  r`  ra  r   rf  rd  s           r&   rj  rj  &  sL    (((		!	!$	'
24&:
;;==&,,.*'1  &[3w<

#f+ '\N"	 
 		'3v;,-"89:,5??6+>+>??'	
((
''
''
##	 
  
 
		G	$$r%   c                  "    e Zd ZU ded<   ded<   y)_TypeAndLayoutir.Typer3   zcs.Constantr   N)r   r   r   rS   r$   r%   r&   r3  r3  E  s    -
r%   r3  c           	     v
   t        | j                         d       }t        j                  |d       }|D ]  \  }}t        |d       }t        j                  |d       D ci c]  \  }}|t	        |       }}}|j                  t        j                  g       }	|j                  t        j                  g       }
d }t        |	|      D cg c]%  \  }}t        |j                  j                  |      ' }}}t        |
|      D cg c]%  \  }}t        |j                  j                  |      ' }}}|D cg c]<  }t        |j                  t        j                        r|j                  j                  > }}|D cg c]<  }t        |j                  t        j                        r|j                  j                  > }}|D cg c]=  }t        |j                  t        j                         s(|j                  j                  ? }}|D cg c]<  }t        |j                  t        j                         r|j                  j                  > }}|D cg c])  }t        |j                  t        j"                        s(|+ }}|D cg c])  }t        |j                  t        j"                        s(|+ }}t%        j&                  |      rN|D cg c]  }t)        j*                  |       }}t,        j.                  j                  |      |j0                  d<   t%        j2                  |      rN|D cg c]  }t)        j*                  |       }}t,        j.                  j                  |      |j0                  d<   t%        j4                  |      rN|D cg c]  }t)        j*                  |       }}t,        j.                  j                  |      |j0                  d	<   t%        j6                  |      rN|D cg c]  }t)        j*                  |       }}t,        j.                  j                  |      |j0                  d
<   	 	 	 	 dd}t%        j8                  |      r4 ||      }t,        j.                  j                  |      |j0                  d<   t%        j:                  |      r4 ||      }t,        j.                  j                  |      |j0                  d<   t=        |        yc c}}w c c}}w c c}}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w )a  Assigns the layouts in `solution` to the MLIR ops they belong to.

  This function requires that, for each MLIR op that appears in `solution`,
  `solution` contains a layout assignment for all of its `vector`, TMEM, and
  SMEM operands and results. Block arguments are ignored.
  c                2    t        | d   j                        S r	  )idr2   kvs    r&   r(  z assign_layouts.<locals>.<lambda>S  s    r"Q%//': r%   )rX   c                     | d   j                   S r	  )r2   r8  s    r&   r(  z assign_layouts.<locals>.<lambda>V  s    BqEOO r%   c                     | d   j                   S r	  r3   r8  s    r&   r(  z assign_layouts.<locals>.<lambda>Z  s    BqEJJ r%   c                     | d   j                   S r	  r<  r8  s    r&   r(  z assign_layouts.<locals>.<lambda>^  s    r!uzz r%   c                     | d   j                   S r	  )r5   r8  s    r&   r(  z assign_layouts.<locals>.<lambda>e  s    r!u{{ r%   r  r  in_tmem_layoutsout_tmem_layoutsc                   g }| D ]  }t        |j                  t        j                        sJ g }|j                  j                  |j                  t        j                  |j                  j                               t        |j                  |j                  j                        }|j                  t        j                  |             |j                  t        j                  j                  |              |S r8   )rE   r   re   r   rB   r`   rP  to_transform_attrrj  r3   r   	ArrayAttrr   )r  	all_attrstlattrsrd  s        r&   _to_transform_attrsz+assign_layouts.<locals>._to_transform_attrs  s     ')i 2""))R]]33399??&
,,{44RYY__E
F$RWWbiioo>'
,,{44W=
>))%012 r%   r  out_transformsN)r  zlist[_TypeAndLayout]rQ   zlist[ir.ArrayAttr])sortedr   	itertoolsgroupbylistr   r   r!   r"   r3  rB   r3   rE   r   re   rf   r   r   r   should_have_in_layoutrP  to_layout_attrr   rC  r&  should_have_out_layoutshould_have_in_tmem_layoutshould_have_out_tmem_layoutshould_have_in_transformsshould_have_out_transformsr#  )r   solution_sorted_by_opsolution_per_opr   r   assignments_sorted_by_typetyrO   assignments_by_typein_assignmentsout_assignmentsr5   r   cein_tlsout_tlsrE  r  r  r?  r@  r  rH  lrF  rG  s                             r&   assign_layoutsr_  K  sv    !nn: %%!;/ ) Q$ob+!'9N!O #**&,A
B 	DK  ),,\-A-A2FN)--l.A.A2FO"E N6Ar 	qww||R(F  O7Ar 	qww||R(G  bii!2!23 			J  bii!2!23 			K  #)Jryy"--,P		O 
 bii/ 			  z"))R]]CM  
299bmm DN  ,,R06@A{))!,AeA$&LL$4$4U$;bmmL!--b16AB{))!,BeB%'\\%5%5e%<bmmM"11"56EF{))!,FeF)+)9)9%)@bmm%&22266FG{))!,GeG*,,,*:*:5*Abmm&'(	 004!-0e')||'7'7'>bmmO$11"5!.1e(*(8(8(?bmm$%#cQ$




 B C G Hsc   'S7*S=*TAT	AT)T9TAT)TT)T"<T"T'>T,!T1T6c                j   t        | j                        D cg c]+  \  }}t        |      rt        | t        j
                  |      - }}}|j                  t        | j                        D cg c]+  \  }}t        |      rt        | t        j                  |      - c}}       |S c c}}w c c}}w )z=Returns all the vector operands and results for the given op.)	r  r=   r   r0   r   r!   r   r>   r"   )r   r  r*  r   s       r&   r   r     s     BKK(
!Q	1 L((!,+ 
 BJJ'
!Q	1 L''+ 
 

s   0B)/0B/
c                   | j                   t        j                  k(  sJ | j                  }|j                  }t        |t        j                        rIt        |j                        j                  |      }t        |j                  t        j                  |      S t        |t        j                        rt        |j                        j                  |      }t        |j                  j                         j                  |j"                        }t        |j                  t        j$                  ||      S t'        d| dt        |       d      )zGiven an operand, returns the corresponding result in its producer.

  When the producer is a block, we return the corresponding operand in the
  operation that owns the block.
  z	Producer z" is not an operation nor a block: r  )r3   r   r!   rB   rv   rE   r   r8  rL  r>   r5   r0   rw   r"   BlockrA   r?   r&  r#   	TypeError)r   rB   producerr5   r6   s        r&   r   r     s     
--	--	-
--%[[(",,'!!"((/EX__l&9&95AA"((###$**51E../55hooFLX^^\%:%:E<PP(=d8n=MQO	 r%   c                4   | j                   t        j                  t        j                  fv sJ g }| j                  j
                  D ]N  }|j                  j                  }|j                  }|j                  t        |t        j                  |             P |S )zSGiven a result or an argument, returns the corresponding operands in its consumers.)r3   r   r"   r#   rB   usesrv   rw   operand_numberr`   r0   r!   )r   consumer_operandsuseconsumerr5   s        r&   rh  rh    s    	,,l.C.CD	DD	D') \\ OcyyHEYx1E1EuMNO 
r%   c           
     >   g }i }| j                         D ]:  \  }}|D ]  }||v st        | d| d||           ||D ci c]  }|| c}z  }< t               }| j                         D ](  \  }}g }g }	|D ]  }|j                  t        j
                  k7  r"|j                  t        j                  k(  rLt        |      }
||
   }|j                  |       ||vse|j                  t        j                  ||             |j                  t        j                  t        j                  fv st        |      D ]B  }||   }|	j                  |       ||vs|j                  t        j                  ||             D 
 |j!                  |       + |S c c}w )z=Derives relayout constraints from the given variable mapping.z is mapped to both z and )r   rI   setrJ   r(   r+   r3   r   r!   r   r`   re   r   r"   r#   rh  add)r   r   r   rq   r   r   r   visited	producers	consumersprproducer_variablecoconsumer_variables                 r&   derive_relayout_constraintsru    s    $&+:<7==? Bh! 

	.	.l-hZu&z235
 	

 [A8AAB "e'7==? h#%I#%I! I
		 	 KOO	3	L00	0Z(3B7*+ G+


R[[):HE
F??|22L4I4IJJ#J/ 	IB5b9



,
- g-r{{85FGH	II. KK56 
=  Bs   
Fc                V    t        | t        j                  t        j                  f      S r8   )rE   r   r'  rE  r"  s    r&   is_terminatorrw    s    	Bcoo6	77r%   c                     ||        t        | t        j                        s@| j                  j                  D ]&  }|D ]  }|j
                  D ]  }t        ||        ! ( yy)zTraverses the operation and applies the callback in pre-order fashion.

  Skips recursing into `mgpu.CustomPrimitiveOp`s, and assumes that the values
  iterated on are not being modified.
  N)rE   rx   CustomPrimitiveOpr2   r?   r  traverse_op)r   callbackr&  r  block_ops        r&   rz  rz    si     2, 
B..	/,,&& * *%(( 	*H
h
)	*** 
0r%   r   c          	        t        j                         t               d
fd}| j                  D ]  }t	        ||        t        t         j                        rt        d      t        j                        }t        j                  |      z  t        t         j                        rJ t        j                        t        t         j                        rJ t        j                        t        t        j                  j                               |      \  }}t        j                   d      rt#        d||z
   d| d       t        |t         j                        rt        d      j                  j%                         D 	ci c]  \  }}|D ]  }	|	||   
  }
}}}	t'        |
       | j                  D ]  }t	        |t(                y	c c}	}}w )a  Infers layouts for the given module.

  * If there are vector (respectively SMEM refs, TMEM refs) operands,
  `in_layouts` (respectively `in_transforms`, `in_tmem_layouts`) will be set and
  contain one element per relevant argument in the memory space.
  * If there are vector (respectively SMEM refs, TMEM refs) outputs,
  `out_layouts` (respectively `out_transforms`, `out_tmem_layouts`) will be set
  and contain one element per relevant argument in the memory space.
  * Any of these attributes is guaranteed to not be set if there is no relevant
  input/output in the corresponding memory space.

  The fuel is provided in order to limit the number of attempts made by the
  solver.
  c                N   t        |       ry t        j                  |       xs, t        j                  |       xs t        j                  |       }|sy t
        j                  | j                  d       }|t        d|         ||       \  }}j                  |       |z  y )Nz%No layout inference rule defined for )
rw  r   r  r  r   r   r   r   r|  r   )r   r  r   r   r   r   global_constraint_systems        r&   gather_constraintsz(infer_layout.<locals>.gather_constraints:  s    
 R**2. 	62226	611"5 
 .2223D3DdKD|"Gt LMM!%c2wJJw 11r%   zsFailed to infer a possible set of layouts. This should only happen if user-provided layout casts are unsatisfiable.r   r   r   zDFinding a solution (or exhausting the entire search space) consumed /z fuel.N)r   zir.Operation)re   r   r   bodyrz  rE   r   rI   ru  r   saturate_distinct_from_splat+saturate_divides_constraints_for_equal_varsr   rL  r   r	   
vlog_is_onprintr   r_  r#  )moduler   r  r   r   r   remaining_fuelr   ksr   layout_for_value_siter   r  s              @@r&   infer_layoutr  %  s   $  002#2, KK (b&'( ("*:*:;
	8 
 ,C,H,HI+b11kJJ0"2B2BCC	C  << 0"2B2BCC	CKK
 2
3'',,./(N 	 ^+,AdV6; < "**+
	8  //557 
!R  !n  &' KK 1b/01s   G!)rg   zcs.RegisterLayoutrh   r   ri   tuple[int, ...]rQ   zIterator[cs.RegisterLayout])rq   r   rQ   zfa.WGStridedFragLayout | None)rq   r   rQ   ztcgen05.TMEMLayout | None)r   z
cs.Dividesr   r4   rQ   )Iterator[tuple[cs.Variable, cs.Constant]])r   zcs.IsTransferabler   dict[cs.Variable, cs.Divides]rQ   r  )r   Sequence[cs.Constraint]rQ   r  )r   r  rQ   r  )r   Sequence[cs.Variable]r   cs.ConstraintSystemrQ   r  )r   r  r   r  r   r4   rQ   z=tuple[dict[cs.Variable, cs.Constant] | cs.Unsatisfiable, int])r   type[ir.OpView])r   rR   rQ   bool)r   r   r   r1   rQ   1tuple[cs.ConstraintSystem, ValueSitesForVariable])r   r   r   zmgpu.VectorLoadOprQ   r  )r   r   r   zmgpu.VectorStoreOprQ   r  )r   r   r   zmgpu.DebugPrintOprQ   r  )r   r   r   zmgpu.PrintLayoutOprQ   r  )r   r   r   zmgpu.BroadcastedIotaOprQ   r  )r   r   r  zarith.ConstantOprQ   r  )r  zir.Blockr   r  rQ   r1   )r   r   r   z	scf.ForOprQ   r  )r1  r4   rQ   z	list[int])r=  r4   r>  rR   rQ   r4   )r   r   r   zscf.WhileOprQ   r  )r   r   r   zscf.IndexSwitchOprQ   r  )r   r   r   zmgpu.LayoutCastOprQ   r  )r^  ir.MemRefTyper   mgpu.SwizzlingModerQ   ztuple[int, int])rk  r4  rl  r  rQ   z.tuple[tuple[int, int] | None, tuple[int, int]])r   r   r   zmgpu.WGMMAOprQ   r  )r   r   r   zvector.BroadcastOprQ   r  )r   r   r   zvector.ReductionOprQ   r  )r  r   r  r   ri   r  rQ   zlist[cs.Constraint])r   r   r   zvector.MultiDimReductionOprQ   r  )r   r   r   zmgpu.BroadcastInDimOprQ   r  )r   r   r   zvector.ShapeCastOprQ   r  )r   r   r   zvector.ExtractStridedSliceOprQ   r  )r   r   r   zmgpu.CustomPrimitiveOprQ   r  )r  zmgpu.TiledLayoutrQ   ztcgen05.TMEMLayout)r   r   r   zmgpu.TmemLayoutCastOprQ   r  )r   r   r   zmgpu.TmemAllocOprQ   r  )r   r   r   zmgpu.TmemDeallocOprQ   r  )r   r   r   zmgpu.TcGen05MMAOprQ   r  )r   r   r   zmgpu.AsyncLoadTmemOprQ   r  )r   r   r   zmgpu.SliceTmemOprQ   r  )r   r   r   zmgpu.AsyncStoreTmemOprQ   r  )r   r   r   zmgpu.SliceSMEMOprQ   r  )r   r   r   zmemref.SubViewOprQ   r  )r   r   r   zmemref.CastOprQ   r  )r   r   r   zmemref.TransposeOprQ   r  )r   r   r   zmemref.ExpandShapeOprQ   r  )r   r   r   zmemref.LoadOp | memref.StoreOprQ   r  )r  r  r  zir.ArrayAttrrQ   zcs.SMEMTiling)r   r   r   zmgpu.WithTransformsOprQ   r  )r   r   r   z$mgpu.AsyncLoadOp | mgpu.AsyncStoreOprQ   r  )r   r1   rQ   r   )
r,  zCallable[[ir.Value], bool]r-  rN   r.  rN   r   r1   rQ   r   )r3   r4  r  zlc.TileTransform | NonerQ   r  )r   zdict[ValueSite, cs.Constant]rQ   r   )r   r1   rQ   zlist[ValueSite])r   r0   rQ   r0   )r   r0   rQ   zSequence[ValueSite])r   r   rQ   zlist[cs.Relayout])r   r1   rQ   r  )r   r1   r{  zCallable[[ir.OpView], None])r  z	ir.Moduler   r4   )r    
__future__r   collections.abcr   r   r   r   r)   rJ  r   retypingr   r   abslr	   jax._src.libr
   rx   jax._src.lib.mlirr   jax._src.lib.mlir.dialectsr   	mlir_mathr   r   r   numpyrY   r   re   r   rZ   r   r   r   r   rP  r   r   _DEFAULT_LAYOUT_INFERENCE_FUELIntEnumr   Enumr(   compilerL   	dataclassr0   rm   rr   r~   r   r   r   r   r   r   r   r   r   rL  r   OpViewr{   r   r   r   rS   r   r   r   r   r   AddIOpAddFOpAndIOp	BitcastOpCmpFOpCmpIOpExtFOpExtSIOpExtUIOpFPToSIOpFPToUIOp
MaximumFOpMaxUIOpMaxSIOp
MinimumFOpMinUIOpMinSIOpMulIOpMulFOpOrIOpFloorDivSIOpDivUIOpDivFOpRemUIOpRemSIOpRemFOpSIToFPOpUIToFPOpSubIOpSubFOpTruncFOpTruncIOpXOrIOpExpOpExp2OpSinOpCosOpLogOpRsqrtOpTanhOpr   VectorLoadOpr  VectorStoreOpr  DebugPrintOpr
  PrintLayoutOpr  BroadcastedIotaOpr  OptimizationBarrierOpr  BroadcastOpr  r9  r  r"  ForOpr/  r4  rA  WhileOprL  IndexSwitchOprN  LayoutCastOprT  r   rq  WGMMAOprz  r~  ReductionOpr  r  MultiDimReductionOpr  BroadcastInDimOpr  ShapeCastOpr  ExtractStridedSliceOpr  ry  r  r  TmemLayoutCastOpr  ry   r  TmemDeallocOpr  TcGen05MMAOpr  AsyncLoadTmemOpr  SliceTmemOpr  AsyncStoreTmemOpr  SliceSMEMOpr  	SubViewOpr  CastOpr  TransposeOpr  ExpandShapeOpr  r  StoreOpr	  r  WithTransformsOpr  r  AsyncStoreOpr  r#  r  rj  r3  r_  r   r   rh  ru  rw  rz  r  r$   r%   r&   <module>r     s   K # 8 8     	 %  3   , 8 - * -   $  " $    ") 4<< $))  -.d#40 40 $40n))) $) !	)077"7


	H!	H36	H.	H46!46!>46 /46n
(
"
(.*1S#1S*1S /1ShG"#G"*G" 	G"
 CG"T B B B4 R[[$y/9:  "*		"	"

4
457"   $ & 
*CC<	<< 7<)	LL)	LL) 
LL) 
OO	)
 
LL) 
LL) 
LL) 
MM) 
MM) 
NN) 
NN) 
) 
MM) 
MM) 
)  
MM!)" 
MM#)$ 
LL%)& 
LL')( 
KK))* 
+), 
MM-). 
LL/)0 
MM1)2 
MM3)4 
LL5)6 
NN7)8 
NN9): 
LL;)< 
LL=)> 
NN?)@ 
NNA)B 
LLC)D OOE)F G)H OOI)J OOK)L OOM)N O)P Q) *NBT -(,-LMU*NZ ((9(9:*	** 7* ;*6 ((:(:;*	** 7* <*6 ((9(9:>	>> 7> ;> ((:(:;/	// 7/ </ ((>(>?F	FF 7F @F ((B(BC9	99 79 D9& ((:(:;&	&& 7& <& ((8(89&	&!& 7& :&0		*9		 (		29	99 79 398* (4#9	#9#9 7#9 5#9L ((9(9:9	99 79 ;9* ((9(9:	 7 ;(:D*38 (5 D	 D D 7 D 6 DF ((:(:;	 7 <" ((:(:;A	AA 7A <A			 $	 		 ((B(BC	" 7 D2 ((=(=>	 7 ?0 ((:(:;'	' 2'6' <'T ((D(DE	 <6 F2 ((>(>?/	// 7/ @/d! ((=(=>	 7 ? ((8(89M	MM 7M :M  ((:(:;6	66 76 <6 ((9(9:;M	;M;M 7;M ;;M| ((<(<=	 7 >& ((8(89	 7 : ((=(=>	 7 ?& ((8(891	11 71 :1 ((8(89)3	)3)3 7)3 :)3X (6B	BB 7B 7B ((:(:;:	:: 7: <:6 ((<(<=M	MM 7M >M: (6'7D	D&D 7D 8 7D&''!' ': ((=(=>	M		M	M 7	M ?	M ((8(89'(9(9:H	H,H 7H ; :H$)  		
 
8%
%#:%%> d#  $
_$D.
+3++\8**)*( 'E^1^1 #^1r%   