# Autogenerated by mlir-tblgen; don't manually edit.

from jaxlib.mlir.dialects._ods_common import _cext as _ods_cext
from jaxlib.mlir.dialects._ods_common import (
    equally_sized_accessor as _ods_equally_sized_accessor,
    get_default_loc_context as _ods_get_default_loc_context,
    get_op_results_or_values as _get_op_results_or_values,
    segmented_accessor as _ods_segmented_accessor,
)
_ods_ir = _ods_cext.ir
_ods_cext.globals.register_traceback_file_exclusion(__file__)

import builtins
from typing import Sequence as _Sequence, Union as _Union, Optional as _Optional


@_ods_cext.register_dialect
class _Dialect(_ods_ir.Dialect):
  DIALECT_NAMESPACE = "mosaic_gpu"

@_ods_cext.register_operation(_Dialect)
class ArriveExpectTxOp(_ods_ir.OpView):
  OPERATION_NAME = "mosaic_gpu.arrive_expect_tx"

  _ODS_REGIONS = (0, True)

  def __init__(self, barrier, expect_tx, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(barrier)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["expect_tx"] = (expect_tx if (
    isinstance(expect_tx, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(expect_tx, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def barrier(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[0]

  @builtins.property
  def expect_tx(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["expect_tx"]

  @expect_tx.setter
  def expect_tx(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["expect_tx"] = value

def arrive_expect_tx(barrier, expect_tx, *, loc=None, ip=None) -> ArriveExpectTxOp:
  return ArriveExpectTxOp(barrier=barrier, expect_tx=expect_tx, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class ArriveOp(_ods_ir.OpView):
  OPERATION_NAME = "mosaic_gpu.arrive"

  _ODS_REGIONS = (0, True)

  def __init__(self, barrier, orders_tensor_core, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(barrier)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["orders_tensor_core"] = (orders_tensor_core if (
    isinstance(orders_tensor_core, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('BoolAttr')) else
      _ods_ir.AttrBuilder.get('BoolAttr')(orders_tensor_core, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def barrier(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[0]

  @builtins.property
  def orders_tensor_core(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["orders_tensor_core"]

  @orders_tensor_core.setter
  def orders_tensor_core(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["orders_tensor_core"] = value

def arrive(barrier, orders_tensor_core, *, loc=None, ip=None) -> ArriveOp:
  return ArriveOp(barrier=barrier, orders_tensor_core=orders_tensor_core, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class AsyncLoadOp(_ods_ir.OpView):
  r"""
   Schedules an async copy of the contents of the `source` MemRef in GMEM to
   the `destination` MemRef in SMEM. The `destination` MemRef in SMEM must be
   contiguous.
  
   Upon completion of the copy, the `complete-tx(complete-count)` operation
   will always be executed on the provided `barrier`.
  
   The `indices` and `slice_lengths` inputs define what slice of the GMEM
   `source` corresponds to the SMEM `destination`. Both `indices` and
   `slice_lengths` must have a length equal to the rank of the `source`. The
   values in `indices` are the starting indices of each dimension and the
   values in `slice_lengths` are the lengths. Providing -1 in `slice_lengths`
   indicates that the slice length is 1 and that the corresponding dimension
   should be collapsed and does not appear in the `destination` MemRef.
  
   The data is written in row-major order to the contiguous SMEM `destination`.
   The `source` data does not need to be contiguous, except for the last
   (and minor-most) dimension.
  
   The `collective` attribute can be provided to use TMA multicast to more
   efficiently load the GMEM data in cases where multiple thread blocks are
   grouped together in a cluster and need to load the same data. Each block in
   a cluster will first load a slice from GMEM to SMEM and then the slices will
   be multicast to all other blocks in the cluster. In this way TMA multicast
   guarantees L2 cache hits. The `collective` attribute is the list of
   cluster dimensions along which to partition the input data loads.
  
   The `predicate` allows scheduling the transfer conditionally. The async copy
  is always scheduled by at most a single lane in the warpgroup.
  """

  OPERATION_NAME = "mosaic_gpu.async_load"

  _ODS_OPERAND_SEGMENTS = [1,1,1,-1,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, source, destination, barrier, indices, slice_lengths, collective, *, predicate=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(source)
    operands.append(destination)
    operands.append(barrier)
    operands.append(_get_op_results_or_values(indices))
    operands.append(predicate)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["slice_lengths"] = (slice_lengths if (
    isinstance(slice_lengths, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('DenseI64ArrayAttr')) else
      _ods_ir.AttrBuilder.get('DenseI64ArrayAttr')(slice_lengths, context=_ods_context))
    attributes["collective"] = (collective if (
    isinstance(collective, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('anonymous_811')) else
      _ods_ir.AttrBuilder.get('anonymous_811')(collective, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def source(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def destination(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0]

  @builtins.property
  def barrier(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range[0]

  @builtins.property
  def indices(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range

  @builtins.property
  def predicate(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def slice_lengths(self) -> _ods_ir.DenseI64ArrayAttr:
    return self.operation.attributes["slice_lengths"]

  @slice_lengths.setter
  def slice_lengths(self, value: _ods_ir.DenseI64ArrayAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["slice_lengths"] = value

  @builtins.property
  def collective(self) -> _ods_ir.ArrayAttr:
    return self.operation.attributes["collective"]

  @collective.setter
  def collective(self, value: _ods_ir.ArrayAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collective"] = value

def async_load(source, destination, barrier, indices, slice_lengths, collective, *, predicate=None, loc=None, ip=None) -> AsyncLoadOp:
  return AsyncLoadOp(source=source, destination=destination, barrier=barrier, indices=indices, slice_lengths=slice_lengths, collective=collective, predicate=predicate, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class AsyncLoadTmemOp(_ods_ir.OpView):
  OPERATION_NAME = "mosaic_gpu.async_load_tmem"

  _ODS_REGIONS = (0, True)

  def __init__(self, source, *, results=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(source)
    _ods_context = _ods_get_default_loc_context(loc)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def source(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[0]

def async_load_tmem(source, *, results=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return AsyncLoadTmemOp(source=source, results=results, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class AsyncPrefetchOp(_ods_ir.OpView):
  r"""
  Schedules an async prefetch of the contents of the `source` MemRef in GMEM
  to the L2 cache, making subsequent loads of the same data from GMEM faster.
  
  The `indices` and `slice_lengths` inputs define what slice of the GMEM
  `source` is going to be prefetched. Both `indices` and `slice_lengths` must
  have a length equal to the rank of the `source`. The values in `indices` are
  the starting indices of each dimension and the values in `slice_lengths` are
  the lengths. Providing -1 in `slice_lengths` indicates that the slice length
  is 1.
  
  The `collective` attribute can be provided to partition the prefetch over
  multiple blocks in a cluster.
  
  The `predicate` allows scheduling the prefetch conditionally.
  """

  OPERATION_NAME = "mosaic_gpu.async_prefetch"

  _ODS_OPERAND_SEGMENTS = [1,-1,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, source, indices, slice_lengths, collective, *, predicate=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(source)
    operands.append(_get_op_results_or_values(indices))
    operands.append(predicate)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["slice_lengths"] = (slice_lengths if (
    isinstance(slice_lengths, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('DenseI64ArrayAttr')) else
      _ods_ir.AttrBuilder.get('DenseI64ArrayAttr')(slice_lengths, context=_ods_context))
    attributes["collective"] = (collective if (
    isinstance(collective, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('anonymous_811')) else
      _ods_ir.AttrBuilder.get('anonymous_811')(collective, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def source(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def indices(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range

  @builtins.property
  def predicate(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def slice_lengths(self) -> _ods_ir.DenseI64ArrayAttr:
    return self.operation.attributes["slice_lengths"]

  @slice_lengths.setter
  def slice_lengths(self, value: _ods_ir.DenseI64ArrayAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["slice_lengths"] = value

  @builtins.property
  def collective(self) -> _ods_ir.ArrayAttr:
    return self.operation.attributes["collective"]

  @collective.setter
  def collective(self, value: _ods_ir.ArrayAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collective"] = value

def async_prefetch(source, indices, slice_lengths, collective, *, predicate=None, loc=None, ip=None) -> AsyncPrefetchOp:
  return AsyncPrefetchOp(source=source, indices=indices, slice_lengths=slice_lengths, collective=collective, predicate=predicate, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class AsyncStoreOp(_ods_ir.OpView):
  r"""
  Schedules an async store of the contents of the `source` MemRef in SMEM to
  the `destination` MemRef in GMEM. The `source` MemRef in SMEM must be
  contiguous.
  
  The `indices` and `slice_lengths` inputs define what slice of the GMEM
  `destination` corresponds to the SMEM `source`. Both `indices` and
  `slice_lengths` must have a length equal to the rank of the `destination`.
  The values in `indices` are the starting indices of each dimension and the
  values in `slice_lengths` are the lengths. Providing -1 in `slice_lengths`
  indicates that this dimension is collapsed in the `source` and needs to be
  expanded to a slice of size 1 in the `destination`.
  
  The data is written in row-major order to the GMEM `destination`. The
  `source` data in SMEM needs to be contiguous, but the `destination` GMEM
  does not.
  
  The `predicate` allows scheduling the transfer conditionally. The async copy
  is always scheduled by at most a single lane in the warpgroup.
  
  The `reduction_op` attribute can be provided to perform a reduction when
  storing to GMEM. For example, using `add` will add the SMEM values to
  existing values in GMEM.
  """

  OPERATION_NAME = "mosaic_gpu.async_store"

  _ODS_OPERAND_SEGMENTS = [1,1,-1,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, source, destination, indices, slice_lengths, *, predicate=None, commit_group=None, reduction_op=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(source)
    operands.append(destination)
    operands.append(_get_op_results_or_values(indices))
    operands.append(predicate)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["slice_lengths"] = (slice_lengths if (
    isinstance(slice_lengths, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('DenseI64ArrayAttr')) else
      _ods_ir.AttrBuilder.get('DenseI64ArrayAttr')(slice_lengths, context=_ods_context))
    if commit_group is not None: attributes["commit_group"] = (commit_group if (
        isinstance(commit_group, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(commit_group, context=_ods_context))
    if reduction_op is not None: attributes["reduction_op"] = (reduction_op if (
        isinstance(reduction_op, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MosaicGPU_TMAReduction')) else
          _ods_ir.AttrBuilder.get('MosaicGPU_TMAReduction')(reduction_op, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def source(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def destination(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0]

  @builtins.property
  def indices(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range

  @builtins.property
  def predicate(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def slice_lengths(self) -> _ods_ir.DenseI64ArrayAttr:
    return self.operation.attributes["slice_lengths"]

  @slice_lengths.setter
  def slice_lengths(self, value: _ods_ir.DenseI64ArrayAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["slice_lengths"] = value

  @builtins.property
  def commit_group(self) -> _Optional[_ods_ir.BoolAttr]:
    if "commit_group" not in self.operation.attributes:
      return None
    return self.operation.attributes["commit_group"]

  @commit_group.setter
  def commit_group(self, value: _Optional[_ods_ir.BoolAttr]):
    if value is not None:
      self.operation.attributes["commit_group"] = value
    elif "commit_group" in self.operation.attributes:
      del self.operation.attributes["commit_group"]

  @commit_group.deleter
  def commit_group(self):
    del self.operation.attributes["commit_group"]

  @builtins.property
  def reduction_op(self) -> _Optional[_ods_ir.Attribute]:
    if "reduction_op" not in self.operation.attributes:
      return None
    return self.operation.attributes["reduction_op"]

  @reduction_op.setter
  def reduction_op(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["reduction_op"] = value
    elif "reduction_op" in self.operation.attributes:
      del self.operation.attributes["reduction_op"]

  @reduction_op.deleter
  def reduction_op(self):
    del self.operation.attributes["reduction_op"]

def async_store(source, destination, indices, slice_lengths, *, predicate=None, commit_group=None, reduction_op=None, loc=None, ip=None) -> AsyncStoreOp:
  return AsyncStoreOp(source=source, destination=destination, indices=indices, slice_lengths=slice_lengths, predicate=predicate, commit_group=commit_group, reduction_op=reduction_op, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class AsyncStoreTmemOp(_ods_ir.OpView):
  OPERATION_NAME = "mosaic_gpu.async_store_tmem"

  _ODS_REGIONS = (0, True)

  def __init__(self, source, destination, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(source)
    operands.append(destination)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def source(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def destination(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[1]

def async_store_tmem(source, destination, *, loc=None, ip=None) -> AsyncStoreTmemOp:
  return AsyncStoreTmemOp(source=source, destination=destination, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class BroadcastInDimOp(_ods_ir.OpView):
  r"""
  `broadcast_dimensions` must have the same size as the rank of the input
  vector and for each input dimension, specifies which output dimension it
  corresponds to.
  """

  OPERATION_NAME = "mosaic_gpu.broadcast_in_dim"

  _ODS_REGIONS = (0, True)

  def __init__(self, result, operand, broadcast_dimensions, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(operand)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["broadcast_dimensions"] = (broadcast_dimensions if (
    isinstance(broadcast_dimensions, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('DenseI64ArrayAttr')) else
      _ods_ir.AttrBuilder.get('DenseI64ArrayAttr')(broadcast_dimensions, context=_ods_context))
    results = []
    results.append(result)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def operand(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def broadcast_dimensions(self) -> _ods_ir.DenseI64ArrayAttr:
    return self.operation.attributes["broadcast_dimensions"]

  @broadcast_dimensions.setter
  def broadcast_dimensions(self, value: _ods_ir.DenseI64ArrayAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["broadcast_dimensions"] = value

def broadcast_in_dim(result, operand, broadcast_dimensions, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return BroadcastInDimOp(result=result, operand=operand, broadcast_dimensions=broadcast_dimensions, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class BroadcastedIotaOp(_ods_ir.OpView):
  r"""
  Creates an array that has the specified shape and holds values starting at
  zero and incrementing by one along the specified dimension.
  """

  OPERATION_NAME = "mosaic_gpu.broadcasted_iota"

  _ODS_REGIONS = (0, True)

  def __init__(self, result, dimension, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["dimension"] = (dimension if (
    isinstance(dimension, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(dimension, context=_ods_context))
    results = []
    results.append(result)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def dimension(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["dimension"]

  @dimension.setter
  def dimension(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["dimension"] = value

def broadcasted_iota(result, dimension, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return BroadcastedIotaOp(result=result, dimension=dimension, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class CustomPrimitiveOp(_ods_ir.OpView):
  r"""
  Allows defining a custom Mosaic GPU primitive.
  
  Custom primitives should carry input and output layouts for each of their
  vector operands and outputs, and input transforms for each of their memref
  operands that live in SMEM.
  
  Custom primitives can only return vectors.
  """

  OPERATION_NAME = "mosaic_gpu.custom_primitive"

  _ODS_REGIONS = (1, True)

  def __init__(self, result, operands_, in_layouts, in_transforms, out_layouts, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.extend(_get_op_results_or_values(operands_))
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["in_layouts"] = (in_layouts if (
    isinstance(in_layouts, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('ArrayAttr')) else
      _ods_ir.AttrBuilder.get('ArrayAttr')(in_layouts, context=_ods_context))
    attributes["in_transforms"] = (in_transforms if (
    isinstance(in_transforms, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('ArrayAttr')) else
      _ods_ir.AttrBuilder.get('ArrayAttr')(in_transforms, context=_ods_context))
    attributes["out_layouts"] = (out_layouts if (
    isinstance(out_layouts, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('ArrayAttr')) else
      _ods_ir.AttrBuilder.get('ArrayAttr')(out_layouts, context=_ods_context))
    results = []
    results.extend(result)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def operands_(self) -> _ods_ir.OpOperandList:
    _ods_variadic_group_length = len(self.operation.operands) - 1 + 1
    return self.operation.operands[0:0 + _ods_variadic_group_length]

  @builtins.property
  def in_layouts(self) -> _ods_ir.ArrayAttr:
    return self.operation.attributes["in_layouts"]

  @in_layouts.setter
  def in_layouts(self, value: _ods_ir.ArrayAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["in_layouts"] = value

  @builtins.property
  def in_transforms(self) -> _ods_ir.ArrayAttr:
    return self.operation.attributes["in_transforms"]

  @in_transforms.setter
  def in_transforms(self, value: _ods_ir.ArrayAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["in_transforms"] = value

  @builtins.property
  def out_layouts(self) -> _ods_ir.ArrayAttr:
    return self.operation.attributes["out_layouts"]

  @out_layouts.setter
  def out_layouts(self, value: _ods_ir.ArrayAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["out_layouts"] = value

  @builtins.property
  def body(self) -> _ods_ir.Region:
    return self.regions[0]

def custom_primitive(result, operands_, in_layouts, in_transforms, out_layouts, *, loc=None, ip=None) -> _Union[_ods_ir.OpResult, _ods_ir.OpResultList, CustomPrimitiveOp]:
  op = CustomPrimitiveOp(result=result, operands_=operands_, in_layouts=in_layouts, in_transforms=in_transforms, out_layouts=out_layouts, loc=loc, ip=ip); results = op.results
  return results if len(results) > 1 else (results[0] if len(results) == 1 else op)

@_ods_cext.register_operation(_Dialect)
class DebugPrintOp(_ods_ir.OpView):
  OPERATION_NAME = "mosaic_gpu.debug_print"

  _ODS_REGIONS = (0, True)

  def __init__(self, format, value, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(value)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["format"] = (format if (
    isinstance(format, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('StrAttr')) else
      _ods_ir.AttrBuilder.get('StrAttr')(format, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def value(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def format(self) -> _ods_ir.StringAttr:
    return self.operation.attributes["format"]

  @format.setter
  def format(self, value: _ods_ir.StringAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["format"] = value

def debug_print(format, value, *, loc=None, ip=None) -> DebugPrintOp:
  return DebugPrintOp(format=format, value=value, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class InitializeBarrierOp(_ods_ir.OpView):
  r"""
  Initializes `num_barriers` barriers each meant to synchronize exactly
  `arrival_count` threads.
  
  `base_pointer` must be a pointer to a shared memory location.
  """

  OPERATION_NAME = "mosaic_gpu.initialize_barrier"

  _ODS_REGIONS = (0, True)

  def __init__(self, base_pointer, arrival_count, num_barriers, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(base_pointer)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["arrival_count"] = (arrival_count if (
    isinstance(arrival_count, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I64Attr')) else
      _ods_ir.AttrBuilder.get('I64Attr')(arrival_count, context=_ods_context))
    attributes["num_barriers"] = (num_barriers if (
    isinstance(num_barriers, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(num_barriers, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def base_pointer(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def arrival_count(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["arrival_count"]

  @arrival_count.setter
  def arrival_count(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["arrival_count"] = value

  @builtins.property
  def num_barriers(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["num_barriers"]

  @num_barriers.setter
  def num_barriers(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["num_barriers"] = value

def initialize_barrier(base_pointer, arrival_count, num_barriers, *, loc=None, ip=None) -> InitializeBarrierOp:
  return InitializeBarrierOp(base_pointer=base_pointer, arrival_count=arrival_count, num_barriers=num_barriers, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class LayoutCastOp(_ods_ir.OpView):
  r"""
  Casts a vector value to a new strided or tiled layout.
  """

  OPERATION_NAME = "mosaic_gpu.layout_cast"

  _ODS_REGIONS = (0, True)

  def __init__(self, x, new_layout, *, results=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(x)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["new_layout"] = (new_layout if (
    isinstance(new_layout, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('anonymous_821')) else
      _ods_ir.AttrBuilder.get('anonymous_821')(new_layout, context=_ods_context))
    if results is None: results = [operands[0].type] * 1
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def x(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def new_layout(self) -> _ods_ir.Attribute:
    return self.operation.attributes["new_layout"]

  @new_layout.setter
  def new_layout(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["new_layout"] = value

def layout_cast(x, new_layout, *, results=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return LayoutCastOp(x=x, new_layout=new_layout, results=results, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class OptimizationBarrierOp(_ods_ir.OpView):
  OPERATION_NAME = "mosaic_gpu.optimization_barrier"

  _ODS_REGIONS = (0, True)

  def __init__(self, operands_, *, results=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.extend(_get_op_results_or_values(operands_))
    _ods_context = _ods_get_default_loc_context(loc)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def operands_(self) -> _ods_ir.OpOperandList:
    _ods_variadic_group_length = len(self.operation.operands) - 1 + 1
    return self.operation.operands[0:0 + _ods_variadic_group_length]

def optimization_barrier(operands_, *, results=None, loc=None, ip=None) -> _Union[_ods_ir.OpResult, _ods_ir.OpResultList, OptimizationBarrierOp]:
  op = OptimizationBarrierOp(operands_=operands_, results=results, loc=loc, ip=ip); results = op.results
  return results if len(results) > 1 else (results[0] if len(results) == 1 else op)

@_ods_cext.register_operation(_Dialect)
class PrintLayoutOp(_ods_ir.OpView):
  OPERATION_NAME = "mosaic_gpu.print_layout"

  _ODS_REGIONS = (0, True)

  def __init__(self, format, value, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(value)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["format"] = (format if (
    isinstance(format, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('StrAttr')) else
      _ods_ir.AttrBuilder.get('StrAttr')(format, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def value(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def format(self) -> _ods_ir.StringAttr:
    return self.operation.attributes["format"]

  @format.setter
  def format(self, value: _ods_ir.StringAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["format"] = value

def print_layout(format, value, *, loc=None, ip=None) -> PrintLayoutOp:
  return PrintLayoutOp(format=format, value=value, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class ReturnOp(_ods_ir.OpView):
  r"""
  The `return` op is a terminator that indicates the end of execution
  within a `CustomPrimitiveOp`'s region. It can optionally return some values,
  which become the results of the parent `CustomPrimitiveOp`.
  
  The declared results of the parent `CustomPrimitiveOp` must match the
  operand types of this op.
  """

  OPERATION_NAME = "mosaic_gpu.return"

  _ODS_REGIONS = (0, True)

  def __init__(self, operands_, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.extend(_get_op_results_or_values(operands_))
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def operands_(self) -> _ods_ir.OpOperandList:
    _ods_variadic_group_length = len(self.operation.operands) - 1 + 1
    return self.operation.operands[0:0 + _ods_variadic_group_length]

def return_(operands_, *, loc=None, ip=None) -> ReturnOp:
  return ReturnOp(operands_=operands_, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class SliceSMEMOp(_ods_ir.OpView):
  OPERATION_NAME = "mosaic_gpu.slice_smem"

  _ODS_REGIONS = (0, True)

  def __init__(self, result, offset, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(offset)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(result)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def offset(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[0]

def slice_smem(result, offset, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return SliceSMEMOp(result=result, offset=offset, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class SliceTmemOp(_ods_ir.OpView):
  r"""
  The principal use case for this op is to do a single TMEM allocation and
  slice it into multiple smaller TMEM references. `source` is the large TMEM
  allocation and `offset` is the number of columns to start slicing from.
  """

  OPERATION_NAME = "mosaic_gpu.slice_tmem"

  _ODS_REGIONS = (0, True)

  def __init__(self, result, source, offset, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(source)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["offset"] = (offset if (
    isinstance(offset, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(offset, context=_ods_context))
    results = []
    results.append(result)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def source(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[0]

  @builtins.property
  def offset(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["offset"]

  @offset.setter
  def offset(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["offset"] = value

def slice_tmem(result, source, offset, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return SliceTmemOp(result=result, source=source, offset=offset, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class TcGen05MMAOp(_ods_ir.OpView):
  r"""
  Schedules `tcgen05.mma` instructions that perform the following matrix
  multiply and accumulate:
  
    accumulator += a * b
  
  This operation supports larger inputs than the PTX-level MMA instruction
  and will schedule as many PTX-level MMA instructions as needed to
  accomplish the calculation.
  
  The inputs should have the following shapes:
    - a: [groups_m * m, groups_k * s]
    - b: [groups_k * s, groups_n * s]
    - accumulator: [groups_m * m, groups_n * s]
  where `s == swizzle / element_bytewidth` and `m` is specified according to
  https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-matrix-shape.
  
  The `accumulator`, `a` and `b` matrices need to be provided as 2-dimensional
  memrefs. The `accumulator` is always in TMEM and `b` is always in SMEM.
  `a` can be in TMEM or SMEM. `a` and `b` must have the same element
  type and when `a` is in TMEM only F16 or BF16 are supported.
  
  `a_scale` and `b_scale` are optional scaling matrices that reside in TMEM.
  When set the operation is defined as:
  
    accumulator += (a * a_scale) * (b * b_scale)
  
  `accumulate` is a boolean that indicates whether to perform the accumulate
  step.
  """

  OPERATION_NAME = "mosaic_gpu.tcgen05_mma"

  _ODS_OPERAND_SEGMENTS = [1,1,1,1,0,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, accumulator, a, b, accumulate, *, a_scale=None, b_scale=None, collective=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(accumulator)
    operands.append(a)
    operands.append(b)
    operands.append(accumulate)
    operands.append(a_scale)
    operands.append(b_scale)
    _ods_context = _ods_get_default_loc_context(loc)
    if collective is not None: attributes["collective"] = (collective if (
        isinstance(collective, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(collective, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def accumulator(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def a(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0]

  @builtins.property
  def b(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range[0]

  @builtins.property
  def accumulate(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0]

  @builtins.property
  def a_scale(self) -> _Optional[_ods_ir.Value[_ods_ir.MemRefType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def b_scale(self) -> _Optional[_ods_ir.Value[_ods_ir.MemRefType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 5)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def collective(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["collective"]

  @collective.setter
  def collective(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collective"] = value

def tcgen05_mma(accumulator, a, b, accumulate, *, a_scale=None, b_scale=None, collective=None, loc=None, ip=None) -> TcGen05MMAOp:
  return TcGen05MMAOp(accumulator=accumulator, a=a, b=b, accumulate=accumulate, a_scale=a_scale, b_scale=b_scale, collective=collective, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class TmemAllocOp(_ods_ir.OpView):
  r"""
  This op allocates a chunk of TMEM and stores the pointer to the memory
  in the provided SMEM memref.
  
  The `smem_ptr` is a pointer in SMEM where a pointer to the allocated
  TMEM will be stored. The op returns a memref to the allocated TMEM. The
  result must have a shape with dimensions [rows, logical_columns]. If
  `packing` is 1, then the number of logical (unpacked) columns is equal to
  the number of allocated columns in TMEM. Otherwise, these constraints
  must hold:
  
      packing = 32 / bitwidth(element type of result)
      unpacked_columns = allocated_columns * packing
  
  The number of allocated columns in TMEM can be any power of two in the
  range [32, 512]. If the calculated number of allocated columns is less than
  32 or not a power of two, then it will be rounded up to the nearest power of
  two larger or equal to 32.
  
  If `collective` is `true` 2 CTAs will perform the allocation collectively,
  otherwise, only one CTA will perform the allocation.
  """

  OPERATION_NAME = "mosaic_gpu.tmem_alloc"

  _ODS_REGIONS = (0, True)

  def __init__(self, result, smem_ptr, *, collective=None, packing=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(smem_ptr)
    _ods_context = _ods_get_default_loc_context(loc)
    if collective is not None: attributes["collective"] = (collective if (
        isinstance(collective, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(collective, context=_ods_context))
    if packing is not None: attributes["packing"] = (packing if (
        isinstance(packing, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('I32Attr')) else
          _ods_ir.AttrBuilder.get('I32Attr')(packing, context=_ods_context))
    results = []
    results.append(result)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def smem_ptr(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[0]

  @builtins.property
  def collective(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["collective"]

  @collective.setter
  def collective(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collective"] = value

  @builtins.property
  def packing(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["packing"]

  @packing.setter
  def packing(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["packing"] = value

def tmem_alloc(result, smem_ptr, *, collective=None, packing=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return TmemAllocOp(result=result, smem_ptr=smem_ptr, collective=collective, packing=packing, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class TmemDeallocOp(_ods_ir.OpView):
  OPERATION_NAME = "mosaic_gpu.tmem_dealloc"

  _ODS_REGIONS = (0, True)

  def __init__(self, tmem_ref, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(tmem_ref)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def tmem_ref(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[0]

def tmem_dealloc(tmem_ref, *, loc=None, ip=None) -> TmemDeallocOp:
  return TmemDeallocOp(tmem_ref=tmem_ref, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class TmemLayoutCastOp(_ods_ir.OpView):
  OPERATION_NAME = "mosaic_gpu.tmem_layout_cast"

  _ODS_REGIONS = (0, True)

  def __init__(self, ref, new_layout, *, results=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(ref)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["new_layout"] = (new_layout if (
    isinstance(new_layout, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MosaicGPU_TiledLayout')) else
      _ods_ir.AttrBuilder.get('MosaicGPU_TiledLayout')(new_layout, context=_ods_context))
    if results is None: results = [operands[0].type] * 1
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def ref(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[0]

  @builtins.property
  def new_layout(self) -> _ods_ir.Attribute:
    return self.operation.attributes["new_layout"]

  @new_layout.setter
  def new_layout(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["new_layout"] = value

def tmem_layout_cast(ref, new_layout, *, results=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return TmemLayoutCastOp(ref=ref, new_layout=new_layout, results=results, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class TmemRelinquishAllocPermitOp(_ods_ir.OpView):
  r"""
  The instruction specifies that the CTA of the executing thread is
  relinquishing the right to allocate Tensor Memory. So, it is illegal for a
  CTA to perform `tmem_alloc` after any of its constituent threads execute
  `tmem_relinquish_alloc_permit`.
  
  If `collective` is `true`, applies to collective TMEM allocations.
  """

  OPERATION_NAME = "mosaic_gpu.tmem_relinquish_alloc_permit"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, collective=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if collective is not None: attributes["collective"] = (collective if (
        isinstance(collective, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(collective, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def collective(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["collective"]

  @collective.setter
  def collective(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collective"] = value

def tmem_relinquish_alloc_permit(*, collective=None, loc=None, ip=None) -> TmemRelinquishAllocPermitOp:
  return TmemRelinquishAllocPermitOp(collective=collective, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class VectorLoadOp(_ods_ir.OpView):
  r"""
  Similar to `vector.load` (vector dialect) but supports loading from
  non-contiguous memory.
  
  If `optimized` is true, raises an error if we cannot generate an optimised
  transfer. If unset, fall back to a non-optimized transfer if unable to
  generate an optimized transfer.
  """

  OPERATION_NAME = "mosaic_gpu.vector_load"

  _ODS_REGIONS = (0, True)

  def __init__(self, source, *, optimized=None, results=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(source)
    _ods_context = _ods_get_default_loc_context(loc)
    if optimized is not None: attributes["optimized"] = (optimized if (
        isinstance(optimized, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(optimized, context=_ods_context))
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def source(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[0]

  @builtins.property
  def optimized(self) -> _Optional[_ods_ir.BoolAttr]:
    if "optimized" not in self.operation.attributes:
      return None
    return self.operation.attributes["optimized"]

  @optimized.setter
  def optimized(self, value: _Optional[_ods_ir.BoolAttr]):
    if value is not None:
      self.operation.attributes["optimized"] = value
    elif "optimized" in self.operation.attributes:
      del self.operation.attributes["optimized"]

  @optimized.deleter
  def optimized(self):
    del self.operation.attributes["optimized"]

def vector_load(source, *, optimized=None, results=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return VectorLoadOp(source=source, optimized=optimized, results=results, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class VectorStoreOp(_ods_ir.OpView):
  r"""
  Similar to `vector.store` (vector dialect) but supports storing to
  non-contiguous memory.
  
  If `optimized` is true, raises an error if we cannot generate an optimised
  transfer. If unset, fall back to a non-optimized transfer if unable to
  generate an optimized transfer.
  """

  OPERATION_NAME = "mosaic_gpu.vector_store"

  _ODS_REGIONS = (0, True)

  def __init__(self, valueToStore, destination, *, optimized=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(valueToStore)
    operands.append(destination)
    _ods_context = _ods_get_default_loc_context(loc)
    if optimized is not None: attributes["optimized"] = (optimized if (
        isinstance(optimized, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(optimized, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def valueToStore(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def destination(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[1]

  @builtins.property
  def optimized(self) -> _Optional[_ods_ir.BoolAttr]:
    if "optimized" not in self.operation.attributes:
      return None
    return self.operation.attributes["optimized"]

  @optimized.setter
  def optimized(self, value: _Optional[_ods_ir.BoolAttr]):
    if value is not None:
      self.operation.attributes["optimized"] = value
    elif "optimized" in self.operation.attributes:
      del self.operation.attributes["optimized"]

  @optimized.deleter
  def optimized(self):
    del self.operation.attributes["optimized"]

def vector_store(value_to_store, destination, *, optimized=None, loc=None, ip=None) -> VectorStoreOp:
  return VectorStoreOp(valueToStore=value_to_store, destination=destination, optimized=optimized, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class WGMMAOp(_ods_ir.OpView):
  r"""
  Schedules WGMMA operations that perform the following matrix multiply and
  accumulate:
  
    accumulator = a * b + accumulator
  
  This operation supports larger inputs than the PTX-level WGMMA operation
  and will schedule as many PTX-level WGMMA operations as needed to
  accomplish the calculation. The `b` matrix, and optionally `a`, need to be
  provided as a 2-dimensional memref.
  
  The inputs should have the following shapes:
    - a: [groups_m * 64, groups_k * s]
    - b: [groups_k * s, groups_n * s]
    - accumulator: [groups_m * 64, groups_n * s]
  where `s == swizzle / element_bytewidth`.
  
  The output has an identical shape and type as the input accumulator.
  
  The `accumulator` is always in registers and `b` is always in shared memory.
  `a` and `b` must have the same element type and when `a` is in
  registers only F16 or BF16 are supported.
  
  The `accumulator` must be a vector with a FragmentedLayout. The WGMMA
  operation will be executed in the async proxy and any inputs in
  registers need to be synchronized with a memory fence.
  
  Usually `a` is read from shared memory if it is used directly in the WGMMA
  operation. If `a` needs to be transformed before it is used in the WGMMA
  operation, it may be more convenient to read it directly form registers.
  This avoids the need to store the data and wait for a fence.
  """

  OPERATION_NAME = "mosaic_gpu.wgmma"

  _ODS_REGIONS = (0, True)

  def __init__(self, accumulator, a, b, *, results=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(accumulator)
    operands.append(a)
    operands.append(b)
    _ods_context = _ods_get_default_loc_context(loc)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def accumulator(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def a(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def b(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[2]

def wgmma(accumulator, a, b, *, results=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return WGMMAOp(accumulator=accumulator, a=a, b=b, results=results, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class WaitOp(_ods_ir.OpView):
  r"""
  All threads in the warpgroup will block, waiting on the provided barrier
  until:
    - all pending threads have arrived on the barrier
    - all expected byte transfers have been completed
    - the barrier's parity matches the provided parity
  """

  OPERATION_NAME = "mosaic_gpu.wait"

  _ODS_REGIONS = (0, True)

  def __init__(self, barrier, parity, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(barrier)
    operands.append(parity)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def barrier(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[0]

  @builtins.property
  def parity(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

def wait(barrier, parity, *, loc=None, ip=None) -> WaitOp:
  return WaitOp(barrier=barrier, parity=parity, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class WithTransformsOp(_ods_ir.OpView):
  r"""
  This op enforces the provided transforms on the parameter memref.
  """

  OPERATION_NAME = "mosaic_gpu.with_transforms"

  _ODS_REGIONS = (0, True)

  def __init__(self, ref, transforms, *, results=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(ref)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["transforms"] = (transforms if (
    isinstance(transforms, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('ArrayAttr')) else
      _ods_ir.AttrBuilder.get('ArrayAttr')(transforms, context=_ods_context))
    if results is None: results = [operands[0].type] * 1
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def ref(self) -> _ods_ir.Value[_ods_ir.MemRefType]:
    return self.operation.operands[0]

  @builtins.property
  def transforms(self) -> _ods_ir.ArrayAttr:
    return self.operation.attributes["transforms"]

  @transforms.setter
  def transforms(self, value: _ods_ir.ArrayAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["transforms"] = value

def with_transforms(ref, transforms, *, results=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return WithTransformsOp(ref=ref, transforms=transforms, results=results, loc=loc, ip=ip).result