# Autogenerated by mlir-tblgen; don't manually edit.

from ._ods_common import _cext as _ods_cext
from ._ods_common import (
    equally_sized_accessor as _ods_equally_sized_accessor,
    get_default_loc_context as _ods_get_default_loc_context,
    get_op_results_or_values as _get_op_results_or_values,
    segmented_accessor as _ods_segmented_accessor,
)
_ods_ir = _ods_cext.ir
_ods_cext.globals.register_traceback_file_exclusion(__file__)

import builtins
from typing import Sequence as _Sequence, Union as _Union, Optional as _Optional


@_ods_cext.register_dialect
class _Dialect(_ods_ir.Dialect):
  DIALECT_NAMESPACE = "nvvm"

@_ods_cext.register_operation(_Dialect)
class Barrier0Op(_ods_ir.OpView):
  r"""
  The `nvvm.barrier0` operation is a convenience operation that performs barrier 
  synchronization and communication within a CTA (Cooperative Thread Array) using 
  barrier ID 0. It is functionally equivalent to `nvvm.barrier` or `nvvm.barrier id=0`. 
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
  """

  OPERATION_NAME = "nvvm.barrier0"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

def barrier0(*, loc=None, ip=None) -> Barrier0Op:
  return Barrier0Op(loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class BarrierArriveOp(_ods_ir.OpView):
  r"""
  Thread that executes this op announces their arrival at the barrier with 
  given id and continue their execution.
  
  The default barrier id is 0 that is similar to `nvvm.barrier` Op. When 
  `barrierId` is not present, the default barrier id is used. 
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
  """

  OPERATION_NAME = "nvvm.barrier.arrive"

  _ODS_REGIONS = (0, True)

  def __init__(self, numberOfThreads, *, barrierId=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    if barrierId is not None: operands.append(barrierId)
    operands.append(numberOfThreads)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def barrierId(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 2 else self.operation.operands[0]

  @builtins.property
  def numberOfThreads(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    _ods_variadic_group_length = len(self.operation.operands) - 2 + 1
    return self.operation.operands[1 + _ods_variadic_group_length - 1]

def barrier_arrive(number_of_threads, *, barrier_id=None, loc=None, ip=None) -> BarrierArriveOp:
  return BarrierArriveOp(numberOfThreads=number_of_threads, barrierId=barrier_id, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class BarrierOp(_ods_ir.OpView):
  r"""
  The `nvvm.barrier` operation performs barrier synchronization and communication 
  within a CTA (Cooperative Thread Array). It causes executing threads to wait for 
  all non-exited threads participating in the barrier to arrive.
  
  The operation takes two optional operands:
  
  - `barrierId`: Specifies a logical barrier resource with value 0 through 15. 
    Each CTA instance has sixteen barriers numbered 0..15. Defaults to 0 if not specified.
  - `numberOfThreads`: Specifies the number of threads participating in the barrier. 
    When specified, the value must be a multiple of the warp size. If not specified, 
    all threads in the CTA participate in the barrier.
  - `reductionOp`: specifies the reduction operation (`popc`, `and`, `or`).
  - `reductionPredicate`: specifies the predicate to be used with the
    `reductionOp`. 
  
  The barrier operation guarantees that when the barrier completes, prior memory 
  accesses requested by participating threads are performed relative to all threads 
  participating in the barrier. It also ensures that no new memory access is 
  requested by participating threads before the barrier completes.
  
  When a barrier completes, the waiting threads are restarted without delay, and 
  the barrier is reinitialized so that it can be immediately reused.
  
  This operation generates an aligned barrier, indicating that all threads in the CTA 
  will execute the same barrier instruction. Behavior is undefined if all threads in the 
  CTA do not reach this instruction.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
  """

  OPERATION_NAME = "nvvm.barrier"

  _ODS_OPERAND_SEGMENTS = [0,0,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, barrierId=None, numberOfThreads=None, reductionOp=None, reductionPredicate=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(barrierId)
    operands.append(numberOfThreads)
    operands.append(reductionPredicate)
    _ods_context = _ods_get_default_loc_context(loc)
    if reductionOp is not None: attributes["reductionOp"] = (reductionOp if (
        isinstance(reductionOp, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BarrierReductionAttr')) else
          _ods_ir.AttrBuilder.get('BarrierReductionAttr')(reductionOp, context=_ods_context))
    results = []
    if res is not None: results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def barrierId(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def numberOfThreads(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def reductionPredicate(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def reductionOp(self) -> _Optional[_ods_ir.Attribute]:
    if "reductionOp" not in self.operation.attributes:
      return None
    return self.operation.attributes["reductionOp"]

  @reductionOp.setter
  def reductionOp(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["reductionOp"] = value
    elif "reductionOp" in self.operation.attributes:
      del self.operation.attributes["reductionOp"]

  @reductionOp.deleter
  def reductionOp(self):
    del self.operation.attributes["reductionOp"]

  @builtins.property
  def res(self) -> _Optional[_ods_ir.OpResult[_ods_ir.IntegerType]]:
    return None if len(self.operation.results) < 1 else self.operation.results[0]

def barrier(res, *, barrier_id=None, number_of_threads=None, reduction_op=None, reduction_predicate=None, loc=None, ip=None) -> _Union[_ods_ir.OpResult, _ods_ir.OpResultList, BarrierOp]:
  op = BarrierOp(res=res, barrierId=barrier_id, numberOfThreads=number_of_threads, reductionOp=reduction_op, reductionPredicate=reduction_predicate, loc=loc, ip=ip); results = op.results
  return results if len(results) > 1 else (results[0] if len(results) == 1 else op)

@_ods_cext.register_operation(_Dialect)
class BlockDimXOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.ntid.x"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_ntid_x(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return BlockDimXOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class BlockDimYOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.ntid.y"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_ntid_y(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return BlockDimYOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class BlockDimZOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.ntid.z"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_ntid_z(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return BlockDimZOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class BlockIdXOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.ctaid.x"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_ctaid_x(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return BlockIdXOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class BlockIdYOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.ctaid.y"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_ctaid_y(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return BlockIdYOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class BlockIdZOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.ctaid.z"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_ctaid_z(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return BlockIdZOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class BlockInClusterIdXOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.cluster.ctaid.x"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_cluster_ctaid_x(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return BlockInClusterIdXOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class BlockInClusterIdYOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.cluster.ctaid.y"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_cluster_ctaid_y(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return BlockInClusterIdYOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class BlockInClusterIdZOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.cluster.ctaid.z"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_cluster_ctaid_z(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return BlockInClusterIdZOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class Breakpoint(_ods_ir.OpView):
  r"""
  Breakpoint suspends execution of the program for debugging.
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-brkpt)
  """

  OPERATION_NAME = "nvvm.breakpoint"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

def breakpoint(*, loc=None, ip=None) -> Breakpoint:
  return Breakpoint(loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class BulkStoreOp(_ods_ir.OpView):
  r"""
  Initializes a region of shared memory at the address given by `addr`.
  The `size` operand specifies the number of bytes to initialize and must be 
  a multiple of 8.
  The `initVal` operand specifies the value to initialize the memory to. The 
  only supported value is 0.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-st-bulk)
  """

  OPERATION_NAME = "nvvm.st.bulk"

  _ODS_REGIONS = (0, True)

  def __init__(self, addr, size, *, initVal=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(size)
    _ods_context = _ods_get_default_loc_context(loc)
    if initVal is not None: attributes["initVal"] = (initVal if (
        isinstance(initVal, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('I64Attr')) else
          _ods_ir.AttrBuilder.get('I64Attr')(initVal, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def size(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def initVal(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["initVal"]

  @initVal.setter
  def initVal(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["initVal"] = value

def st_bulk(addr, size, *, init_val=None, loc=None, ip=None) -> BulkStoreOp:
  return BulkStoreOp(addr=addr, size=size, initVal=init_val, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Clock64Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.clock64"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_clock64(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return Clock64Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClockOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.clock"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_clock(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClockOp(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterArriveOp(_ods_ir.OpView):
  r"""
  The `cluster.arrive` can be used by the threads within the cluster for synchronization and
  communication. The `cluster.arrive` instruction marks the warps' arrival at the barrier
  without causing the executing thread to wait for other participating threads.
  
  The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
  """

  OPERATION_NAME = "nvvm.cluster.arrive"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, aligned=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if bool(aligned): attributes["aligned"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def aligned(self) -> bool:
    return "aligned" in self.operation.attributes

  @aligned.setter
  def aligned(self, value):
    if bool(value):
      self.operation.attributes["aligned"] = _ods_ir.UnitAttr.get()
    elif "aligned" in self.operation.attributes:
      del self.operation.attributes["aligned"]

  @aligned.deleter
  def aligned(self):
    del self.operation.attributes["aligned"]

def cluster_arrive(*, aligned=None, loc=None, ip=None) -> ClusterArriveOp:
  return ClusterArriveOp(aligned=aligned, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class ClusterArriveRelaxedOp(_ods_ir.OpView):
  r"""
  The `cluster.arrive` can be used by the threads within the cluster for synchronization and
  communication. The `cluster.arrive` instruction marks the warps' arrival at the barrier
  without causing the executing thread to wait for other participating threads.
  
  The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.
  The .relaxed qualifier on `cluster.arrive` specifies that there are no memory
  ordering and visibility guarantees provided for the memory accesses performed prior to
  `cluster.arrive`.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
  """

  OPERATION_NAME = "nvvm.cluster.arrive.relaxed"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, aligned=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if bool(aligned): attributes["aligned"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def aligned(self) -> bool:
    return "aligned" in self.operation.attributes

  @aligned.setter
  def aligned(self, value):
    if bool(value):
      self.operation.attributes["aligned"] = _ods_ir.UnitAttr.get()
    elif "aligned" in self.operation.attributes:
      del self.operation.attributes["aligned"]

  @aligned.deleter
  def aligned(self):
    del self.operation.attributes["aligned"]

def cluster_arrive_relaxed(*, aligned=None, loc=None, ip=None) -> ClusterArriveRelaxedOp:
  return ClusterArriveRelaxedOp(aligned=aligned, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class ClusterDim(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.cluster.nctarank"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_cluster_nctarank(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterDim(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterDimBlocksXOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.cluster.nctaid.x"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_cluster_nctaid_x(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterDimBlocksXOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterDimBlocksYOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.cluster.nctaid.y"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_cluster_nctaid_y(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterDimBlocksYOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterDimBlocksZOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.cluster.nctaid.z"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_cluster_nctaid_z(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterDimBlocksZOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterDimXOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.nclusterid.x"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_nclusterid_x(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterDimXOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterDimYOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.nclusterid.y"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_nclusterid_y(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterDimYOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterDimZOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.nclusterid.z"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_nclusterid_z(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterDimZOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterId(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.cluster.ctarank"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_cluster_ctarank(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterId(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterIdXOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.clusterid.x"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_clusterid_x(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterIdXOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterIdYOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.clusterid.y"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_clusterid_y(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterIdYOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterIdZOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.clusterid.z"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_clusterid_z(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterIdZOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterLaunchControlQueryCancelOp(_ods_ir.OpView):
  r"""
  `clusterlaunchcontrol.query.cancel` queries the response of a 
  `clusterlaunchcontrol.try.cancel` operation specified by operand 
  `try_cancel_response`.
  
  Operand `query_type` specifies the type of query to perform and can be one 
  of the following:
  - `is_canceled` : Returns true if the try cancel request succeeded, 
  and false otherwise.
  - `get_first_cta_id_{x/y/z}` : Returns the x, y, or z coordinate of the 
  first CTA in the canceled cluster. Behaviour is defined only if the try 
  cancel request succeeded. 
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-query-cancel)
  """

  OPERATION_NAME = "nvvm.clusterlaunchcontrol.query.cancel"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, query_type, try_cancel_response, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(try_cancel_response)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["query_type"] = (query_type if (
    isinstance(query_type, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('ClusterLaunchControlQueryTypeAttr')) else
      _ods_ir.AttrBuilder.get('ClusterLaunchControlQueryTypeAttr')(query_type, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def try_cancel_response(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[0]

  @builtins.property
  def query_type(self) -> _ods_ir.Attribute:
    return self.operation.attributes["query_type"]

  @query_type.setter
  def query_type(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["query_type"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def clusterlaunchcontrol_query_cancel(res, query_type, try_cancel_response, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return ClusterLaunchControlQueryCancelOp(res=res, query_type=query_type, try_cancel_response=try_cancel_response, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ClusterLaunchControlTryCancelOp(_ods_ir.OpView):
  r"""
  `clusterlaunchcontrol.try.cancel` requests atomically canceling the launch 
  of a cluster that has not started running yet. It asynchronously writes an 
  opaque response to shared memory indicating whether the operation succeeded 
  or failed.
  
  Operand `smemAddress` specifies the naturally aligned address of the 
  16-byte wide shared memory location where the request's response is written.
  
  Operand `mbarrier` specifies the mbarrier object used to track the 
  completion of the asynchronous operation.
  
  If `multicast` is specified, the response is asynchronously written to the 
  corresponding local shared memory location (specifed by `addr`) of each CTA 
  in the requesting cluster.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-try-cancel)
  """

  OPERATION_NAME = "nvvm.clusterlaunchcontrol.try.cancel"

  _ODS_REGIONS = (0, True)

  def __init__(self, smemAddress, mbarrier, *, multicast=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(smemAddress)
    operands.append(mbarrier)
    _ods_context = _ods_get_default_loc_context(loc)
    if bool(multicast): attributes["multicast"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def smemAddress(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def mbarrier(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def multicast(self) -> bool:
    return "multicast" in self.operation.attributes

  @multicast.setter
  def multicast(self, value):
    if bool(value):
      self.operation.attributes["multicast"] = _ods_ir.UnitAttr.get()
    elif "multicast" in self.operation.attributes:
      del self.operation.attributes["multicast"]

  @multicast.deleter
  def multicast(self):
    del self.operation.attributes["multicast"]

def clusterlaunchcontrol_try_cancel(smem_address, mbarrier, *, multicast=None, loc=None, ip=None) -> ClusterLaunchControlTryCancelOp:
  return ClusterLaunchControlTryCancelOp(smemAddress=smem_address, mbarrier=mbarrier, multicast=multicast, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class ClusterWaitOp(_ods_ir.OpView):
  r"""
  The `cluster.wait` causes the executing thread to wait for all non-exited threads
  of the cluster to perform `cluster.arrive`. The `aligned` attribute, when provided,
  generates the .aligned version of the PTX instruction.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
  """

  OPERATION_NAME = "nvvm.cluster.wait"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, aligned=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if bool(aligned): attributes["aligned"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def aligned(self) -> bool:
    return "aligned" in self.operation.attributes

  @aligned.setter
  def aligned(self, value):
    if bool(value):
      self.operation.attributes["aligned"] = _ods_ir.UnitAttr.get()
    elif "aligned" in self.operation.attributes:
      del self.operation.attributes["aligned"]

  @aligned.deleter
  def aligned(self):
    del self.operation.attributes["aligned"]

def cluster_wait(*, aligned=None, loc=None, ip=None) -> ClusterWaitOp:
  return ClusterWaitOp(aligned=aligned, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class ConvertBF16x2ToF8x2Op(_ods_ir.OpView):
  r"""
  This Op converts the given bf16 inputs in a bf16x2 vector to the specified 
  f8 type.
  The result `dst` is represented as an i16 type or as a vector
  of two i8 types.
  If `dst` is returned as an i16 type, the converted values from `a`
  are packed such that the value converted from the first element of `a`
  is stored in the upper 8 bits of `dst` and the value converted from the
  second element of `a` is stored in the lower 8 bits of `dst`.
  If `dst` is returned as a vector type, each converted value is stored as an 
  i8 element in the vector.
  The `rnd` and `sat` attributes specify the rounding and saturation modes 
  respectively.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.bf16x2.to.f8x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, a, dstTy, *, rnd=None, sat=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(a)
    _ods_context = _ods_get_default_loc_context(loc)
    if rnd is not None: attributes["rnd"] = (rnd if (
        isinstance(rnd, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('FPRoundingModeAttr')) else
          _ods_ir.AttrBuilder.get('FPRoundingModeAttr')(rnd, context=_ods_context))
    if sat is not None: attributes["sat"] = (sat if (
        isinstance(sat, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('SaturationModeAttr')) else
          _ods_ir.AttrBuilder.get('SaturationModeAttr')(sat, context=_ods_context))
    attributes["dstTy"] = (dstTy if (
    isinstance(dstTy, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(dstTy, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def a(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def rnd(self) -> _ods_ir.Attribute:
    return self.operation.attributes["rnd"]

  @rnd.setter
  def rnd(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["rnd"] = value

  @builtins.property
  def sat(self) -> _ods_ir.Attribute:
    return self.operation.attributes["sat"]

  @sat.setter
  def sat(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["sat"] = value

  @builtins.property
  def dstTy(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["dstTy"]

  @dstTy.setter
  def dstTy(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["dstTy"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def convert_bf16x2_to_f8x2(dst, a, dst_ty, *, rnd=None, sat=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertBF16x2ToF8x2Op(dst=dst, a=a, dstTy=dst_ty, rnd=rnd, sat=sat, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF4x2ToF16x2Op(_ods_ir.OpView):
  r"""
  This Op converts the given f4 inputs in a packed i8 to f16.
  
  The result `dst` is represented as a vector of f16 elements.
  The `relu` attribute, when set, lowers to the '.relu' variant of 
  the cvt instruction."
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f4x2.to.f16x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, src, srcType, *, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(src)
    _ods_context = _ods_get_default_loc_context(loc)
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    attributes["srcType"] = (srcType if (
    isinstance(srcType, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(srcType, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def src(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[0]

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def srcType(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["srcType"]

  @srcType.setter
  def srcType(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["srcType"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult[_ods_ir.VectorType]:
    return self.operation.results[0]

def convert_f4x2_to_f16x2(dst, src, src_type, *, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF4x2ToF16x2Op(dst=dst, src=src, srcType=src_type, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF6x2ToF16x2Op(_ods_ir.OpView):
  r"""
  This Op converts the given f6 inputs in a i8x2 vector to f16.
  
  The result `dst` is represented as a vector of f16 elements.
  The `relu` attribute, when set, lowers to the '.relu' variant of 
  the cvt instruction."
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f6x2.to.f16x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, src, srcType, *, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(src)
    _ods_context = _ods_get_default_loc_context(loc)
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    attributes["srcType"] = (srcType if (
    isinstance(srcType, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(srcType, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def src(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def srcType(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["srcType"]

  @srcType.setter
  def srcType(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["srcType"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult[_ods_ir.VectorType]:
    return self.operation.results[0]

def convert_f6x2_to_f16x2(dst, src, src_type, *, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF6x2ToF16x2Op(dst=dst, src=src, srcType=src_type, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF8x2ToBF16x2Op(_ods_ir.OpView):
  r"""
  This Op converts the given f8 inputs in a i8x2 vector to bf16.
  
  The result `dst` is represented as a vector of bf16 elements.
  
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f8x2.to.bf16x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, src, srcType, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(src)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["srcType"] = (srcType if (
    isinstance(srcType, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(srcType, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def src(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def srcType(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["srcType"]

  @srcType.setter
  def srcType(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["srcType"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult[_ods_ir.VectorType]:
    return self.operation.results[0]

def convert_f8x2_to_bf16x2(dst, src, src_type, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF8x2ToBF16x2Op(dst=dst, src=src, srcType=src_type, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF8x2ToF16x2Op(_ods_ir.OpView):
  r"""
  This Op converts the given f8 inputs in a i8x2 vector to f16.
  
  The result `dst` is represented as a vector of f16 elements.
  The `relu` attribute, when set, lowers to the '.relu' variant of 
  the cvt instruction."
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f8x2.to.f16x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, src, srcType, *, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(src)
    _ods_context = _ods_get_default_loc_context(loc)
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    attributes["srcType"] = (srcType if (
    isinstance(srcType, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(srcType, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def src(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def srcType(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["srcType"]

  @srcType.setter
  def srcType(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["srcType"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult[_ods_ir.VectorType]:
    return self.operation.results[0]

def convert_f8x2_to_f16x2(dst, src, src_type, *, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF8x2ToF16x2Op(dst=dst, src=src, srcType=src_type, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF16x2ToF8x2Op(_ods_ir.OpView):
  r"""
  This Op converts the given f16 inputs in an f16x2 vector to the specified 
  f8 type.
  The result `dst` is represented as an i16 type or as a vector
  of two i8 types.
  If `dst` is returned as an i16 type, the converted values from `a`
  are packed such that the value converted from the first element of `a`
  is stored in the upper 8 bits of `dst` and the value converted from the
  second element of `a` is stored in the lower 8 bits of `dst`.
  If `dst` is returned as a vector type, each converted value is stored as an 
  i8 element in the vector.
  The `relu` attribute, when set, lowers to the '.relu' variant of
  the cvt instruction.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f16x2.to.f8x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, a, dstTy, *, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(a)
    _ods_context = _ods_get_default_loc_context(loc)
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    attributes["dstTy"] = (dstTy if (
    isinstance(dstTy, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(dstTy, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def a(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def dstTy(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["dstTy"]

  @dstTy.setter
  def dstTy(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["dstTy"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def convert_f16x2_to_f8x2(dst, a, dst_ty, *, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF16x2ToF8x2Op(dst=dst, a=a, dstTy=dst_ty, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF32x2ToBF16x2Op(_ods_ir.OpView):
  r"""
  Converts two F32 values to packed bf16x2 format with 
  the specified rounding mode. The `src_hi` and `src_lo` parameters 
  correspond to operands `a` and `b` in the PTX ISA, respectively.
  
  The `random_bits` parameter is required for stochastic rounding and 
  provides the [random bits](https://docs.nvidia.com/cuda/parallel-thread-execution/#cvt-rs-rbits-layout-bf16) to be used for the conversion.
  
  The `relu` attribute clamps negative results to 0.
  
  The `sat` attribute determines saturation behavior.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f32x2.to.bf16x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, src_hi, src_lo, *, random_bits=None, rnd=None, sat=None, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(src_hi)
    operands.append(src_lo)
    if random_bits is not None: operands.append(random_bits)
    _ods_context = _ods_get_default_loc_context(loc)
    if rnd is not None: attributes["rnd"] = (rnd if (
        isinstance(rnd, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('FPRoundingModeAttr')) else
          _ods_ir.AttrBuilder.get('FPRoundingModeAttr')(rnd, context=_ods_context))
    if sat is not None: attributes["sat"] = (sat if (
        isinstance(sat, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('SaturationModeAttr')) else
          _ods_ir.AttrBuilder.get('SaturationModeAttr')(sat, context=_ods_context))
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def src_hi(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[0]

  @builtins.property
  def src_lo(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[1]

  @builtins.property
  def random_bits(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 3 else self.operation.operands[2]

  @builtins.property
  def rnd(self) -> _ods_ir.Attribute:
    return self.operation.attributes["rnd"]

  @rnd.setter
  def rnd(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["rnd"] = value

  @builtins.property
  def sat(self) -> _ods_ir.Attribute:
    return self.operation.attributes["sat"]

  @sat.setter
  def sat(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["sat"] = value

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult[_ods_ir.VectorType]:
    return self.operation.results[0]

def convert_f32x2_to_bf16x2(dst, src_hi, src_lo, *, random_bits=None, rnd=None, sat=None, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF32x2ToBF16x2Op(dst=dst, src_hi=src_hi, src_lo=src_lo, random_bits=random_bits, rnd=rnd, sat=sat, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF32x2ToF4x2Op(_ods_ir.OpView):
  r"""
  This Op converts each of the given float inputs to the specified fp4 type.
  The result `dst` is returned as an i8 type where the converted values are 
  packed such that the value converted from `a` is stored in the upper 4 bits 
  of `dst` and the value converted from `b` is stored in the lower 4 bits of 
  `dst`.
  The `relu` attribute, when set, lowers to the '.relu' variant of
  the cvt instruction.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f32x2.to.f4x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, a, b, dstTy, *, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(a)
    operands.append(b)
    _ods_context = _ods_get_default_loc_context(loc)
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    attributes["dstTy"] = (dstTy if (
    isinstance(dstTy, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(dstTy, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def a(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[0]

  @builtins.property
  def b(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[1]

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def dstTy(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["dstTy"]

  @dstTy.setter
  def dstTy(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["dstTy"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def convert_f32x2_to_f4x2(dst, a, b, dst_ty, *, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF32x2ToF4x2Op(dst=dst, a=a, b=b, dstTy=dst_ty, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF32x2ToF6x2Op(_ods_ir.OpView):
  r"""
  This Op converts each of the given float inputs to the specified fp6 type.
  The result `dst` is represented either as an i16 type or as a vector
  of two i8 types.
  If `dst` is returned as an i16 type, the converted values are packed such 
  that the value converted from `a` is stored in the upper 8 bits of `dst` 
  with 2 MSB bits padded with zeros and the value converted from `b` is 
  stored in the lower 8 bits of `dst` with 2 MSB bits padded with zeros.
  If `dst` is returned as a vector type, each converted value is stored as an 
  i8 element in the vector.
  The `relu` attribute, when set, lowers to the '.relu' variant of
  the cvt instruction.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f32x2.to.f6x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, a, b, dstTy, *, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(a)
    operands.append(b)
    _ods_context = _ods_get_default_loc_context(loc)
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    attributes["dstTy"] = (dstTy if (
    isinstance(dstTy, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(dstTy, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def a(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[0]

  @builtins.property
  def b(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[1]

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def dstTy(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["dstTy"]

  @dstTy.setter
  def dstTy(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["dstTy"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def convert_f32x2_to_f6x2(dst, a, b, dst_ty, *, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF32x2ToF6x2Op(dst=dst, a=a, b=b, dstTy=dst_ty, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF32x2ToF8x2Op(_ods_ir.OpView):
  r"""
  This Op converts each of the given float inputs to the specified fp8 type.
  The result `dst` is represented as an i16 type or as a vector
  of two i8 types.
  If `dst` is returned as an i16 type, the converted values are packed such 
  that the value converted from `a` is stored in the upper 8 bits of `dst` 
  and the value converted from `b` is stored in the lower 8 bits of `dst`.
  If `dst` is returned as a vector type, each converted value is stored as an 
  i8 element in the vector.
  The `rnd` and `sat` attributes specify the rounding and saturation modes respectively.
  The `relu` attribute, when set, lowers to the '.relu' variant of
  the cvt instruction.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f32x2.to.f8x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, a, b, dstTy, *, rnd=None, sat=None, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(a)
    operands.append(b)
    _ods_context = _ods_get_default_loc_context(loc)
    if rnd is not None: attributes["rnd"] = (rnd if (
        isinstance(rnd, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('FPRoundingModeAttr')) else
          _ods_ir.AttrBuilder.get('FPRoundingModeAttr')(rnd, context=_ods_context))
    if sat is not None: attributes["sat"] = (sat if (
        isinstance(sat, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('SaturationModeAttr')) else
          _ods_ir.AttrBuilder.get('SaturationModeAttr')(sat, context=_ods_context))
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    attributes["dstTy"] = (dstTy if (
    isinstance(dstTy, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(dstTy, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def a(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[0]

  @builtins.property
  def b(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[1]

  @builtins.property
  def rnd(self) -> _ods_ir.Attribute:
    return self.operation.attributes["rnd"]

  @rnd.setter
  def rnd(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["rnd"] = value

  @builtins.property
  def sat(self) -> _ods_ir.Attribute:
    return self.operation.attributes["sat"]

  @sat.setter
  def sat(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["sat"] = value

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def dstTy(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["dstTy"]

  @dstTy.setter
  def dstTy(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["dstTy"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def convert_f32x2_to_f8x2(dst, a, b, dst_ty, *, rnd=None, sat=None, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF32x2ToF8x2Op(dst=dst, a=a, b=b, dstTy=dst_ty, rnd=rnd, sat=sat, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF32x2ToF16x2Op(_ods_ir.OpView):
  r"""
  Converts two F32 values to packed f16x2 format with 
  the specified rounding mode. The `src_hi` and `src_lo` parameters 
  correspond to operands `a` and `b` in the PTX ISA, respectively.
  
  The `random_bits` parameter is required for stochastic rounding and 
  provides the [random bits](https://docs.nvidia.com/cuda/parallel-thread-execution/#cvt-rs-rbits-layout-f16) to be used for the conversion.
  
  The `relu` attribute clamps negative results to 0.
  
  The `sat` attribute determines saturation behavior.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f32x2.to.f16x2"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, src_hi, src_lo, *, random_bits=None, rnd=None, sat=None, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(src_hi)
    operands.append(src_lo)
    if random_bits is not None: operands.append(random_bits)
    _ods_context = _ods_get_default_loc_context(loc)
    if rnd is not None: attributes["rnd"] = (rnd if (
        isinstance(rnd, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('FPRoundingModeAttr')) else
          _ods_ir.AttrBuilder.get('FPRoundingModeAttr')(rnd, context=_ods_context))
    if sat is not None: attributes["sat"] = (sat if (
        isinstance(sat, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('SaturationModeAttr')) else
          _ods_ir.AttrBuilder.get('SaturationModeAttr')(sat, context=_ods_context))
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def src_hi(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[0]

  @builtins.property
  def src_lo(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[1]

  @builtins.property
  def random_bits(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 3 else self.operation.operands[2]

  @builtins.property
  def rnd(self) -> _ods_ir.Attribute:
    return self.operation.attributes["rnd"]

  @rnd.setter
  def rnd(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["rnd"] = value

  @builtins.property
  def sat(self) -> _ods_ir.Attribute:
    return self.operation.attributes["sat"]

  @sat.setter
  def sat(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["sat"] = value

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult[_ods_ir.VectorType]:
    return self.operation.results[0]

def convert_f32x2_to_f16x2(dst, src_hi, src_lo, *, random_bits=None, rnd=None, sat=None, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF32x2ToF16x2Op(dst=dst, src_hi=src_hi, src_lo=src_lo, random_bits=random_bits, rnd=rnd, sat=sat, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF32x4ToF4x4Op(_ods_ir.OpView):
  r"""
  Converts a vector<4xf32> to packed f4x4 format using 
  stochastic rounding (.rs) mode with SATFINITE saturation. Randomness is 
  provided by the `rbits` parameter. The `dstTy` attribute specifies the 
  target floating-point format. The `relu` attribute clamps negative results to 0.
  
  Note: These operations always use RS rounding mode and SATFINITE saturation mode.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f32x4.to.f4x4"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, src, rbits, dstTy, *, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(src)
    operands.append(rbits)
    _ods_context = _ods_get_default_loc_context(loc)
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    attributes["dstTy"] = (dstTy if (
    isinstance(dstTy, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(dstTy, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def src(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def rbits(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def dstTy(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["dstTy"]

  @dstTy.setter
  def dstTy(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["dstTy"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def convert_f32x4_to_f4x4(dst, src, rbits, dst_ty, *, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF32x4ToF4x4Op(dst=dst, src=src, rbits=rbits, dstTy=dst_ty, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF32x4ToF6x4Op(_ods_ir.OpView):
  r"""
  Converts a vector<4xf32> to packed f6x4 format using 
  stochastic rounding (.rs) mode with SATFINITE saturation. Randomness is 
  provided by the `rbits` parameter. The `dstTy` attribute specifies the 
  target floating-point format. The `relu` attribute clamps negative results to 0.
  
  Note: These operations always use RS rounding mode and SATFINITE saturation mode.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f32x4.to.f6x4"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, src, rbits, dstTy, *, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(src)
    operands.append(rbits)
    _ods_context = _ods_get_default_loc_context(loc)
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    attributes["dstTy"] = (dstTy if (
    isinstance(dstTy, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(dstTy, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def src(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def rbits(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def dstTy(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["dstTy"]

  @dstTy.setter
  def dstTy(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["dstTy"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult[_ods_ir.VectorType]:
    return self.operation.results[0]

def convert_f32x4_to_f6x4(dst, src, rbits, dst_ty, *, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF32x4ToF6x4Op(dst=dst, src=src, rbits=rbits, dstTy=dst_ty, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertF32x4ToF8x4Op(_ods_ir.OpView):
  r"""
  Converts a vector<4xf32> to packed f8x4 format using 
  stochastic rounding (.rs) mode with SATFINITE saturation. Randomness is 
  provided by the `rbits` parameter. The `dstTy` attribute specifies the 
  target floating-point format. The `relu` attribute clamps negative results to 0.
  
  Note: These operations always use RS rounding mode and SATFINITE saturation mode.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.f32x4.to.f8x4"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, src, rbits, dstTy, *, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(src)
    operands.append(rbits)
    _ods_context = _ods_get_default_loc_context(loc)
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    attributes["dstTy"] = (dstTy if (
    isinstance(dstTy, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TypeAttr')) else
      _ods_ir.AttrBuilder.get('TypeAttr')(dstTy, context=_ods_context))
    results = []
    results.append(dst)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def src(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def rbits(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def dstTy(self) -> _ods_ir.TypeAttr:
    return self.operation.attributes["dstTy"]

  @dstTy.setter
  def dstTy(self, value: _ods_ir.TypeAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["dstTy"] = value

  @builtins.property
  def dst(self) -> _ods_ir.OpResult[_ods_ir.VectorType]:
    return self.operation.results[0]

def convert_f32x4_to_f8x4(dst, src, rbits, dst_ty, *, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertF32x4ToF8x4Op(dst=dst, src=src, rbits=rbits, dstTy=dst_ty, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ConvertFloatToTF32Op(_ods_ir.OpView):
  r"""
  This Op converts the given f32 input to tf32.
  The result `res` is represented as an i32 type.
  The `relu` attribute, when set, lowers to the '.relu' variant of
  the cvt instruction. The `rnd` and `sat` attributes specify the
  the rounding and saturation modes respectively.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
  """

  OPERATION_NAME = "nvvm.convert.float.to.tf32"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, src, *, rnd=None, sat=None, relu=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(src)
    _ods_context = _ods_get_default_loc_context(loc)
    if rnd is not None: attributes["rnd"] = (rnd if (
        isinstance(rnd, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('FPRoundingModeAttr')) else
          _ods_ir.AttrBuilder.get('FPRoundingModeAttr')(rnd, context=_ods_context))
    if sat is not None: attributes["sat"] = (sat if (
        isinstance(sat, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('SaturationModeAttr')) else
          _ods_ir.AttrBuilder.get('SaturationModeAttr')(sat, context=_ods_context))
    if relu is not None: attributes["relu"] = (relu if (
        isinstance(relu, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relu, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def src(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[0]

  @builtins.property
  def rnd(self) -> _ods_ir.Attribute:
    return self.operation.attributes["rnd"]

  @rnd.setter
  def rnd(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["rnd"] = value

  @builtins.property
  def sat(self) -> _ods_ir.Attribute:
    return self.operation.attributes["sat"]

  @sat.setter
  def sat(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["sat"] = value

  @builtins.property
  def relu(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relu"]

  @relu.setter
  def relu(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relu"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def convert_float_to_tf32(res, src, *, rnd=None, sat=None, relu=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ConvertFloatToTF32Op(res=res, src=src, rnd=rnd, sat=sat, relu=relu, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class CpAsyncBulkCommitGroupOp(_ods_ir.OpView):
  r"""
  This Op commits all prior initiated but uncommitted cp.async.bulk
  instructions into a cp.async.bulk-group.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-commit-group)
  """

  OPERATION_NAME = "nvvm.cp.async.bulk.commit.group"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

def cp_async_bulk_commit_group(*, loc=None, ip=None) -> CpAsyncBulkCommitGroupOp:
  return CpAsyncBulkCommitGroupOp(loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncBulkGlobalToSharedClusterOp(_ods_ir.OpView):
  r"""
  Initiates an asynchronous copy operation from global memory to shared
  memory or shared_cluster memory.
  
  The `multicastMask` operand is optional and can be used only when the
  destination is shared::cluster memory. When it is present, this Op copies
  data from global memory to shared memory of multiple CTAs in the cluster.
  Operand `multicastMask` specifies the destination CTAs in the cluster such
  that each bit position in the 16-bit `multicastMask` operand corresponds to
  the `nvvm.read.ptx.sreg.ctaid` of the destination CTA. 
  
  The `l2CacheHint` operand is optional, and it is used to specify cache
  eviction policy that may be used during the memory access.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk)
  """

  OPERATION_NAME = "nvvm.cp.async.bulk.shared.cluster.global"

  _ODS_OPERAND_SEGMENTS = [1,1,1,1,0,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, dstMem, srcMem, mbar, size, *, multicastMask=None, l2CacheHint=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(dstMem)
    operands.append(srcMem)
    operands.append(mbar)
    operands.append(size)
    operands.append(multicastMask)
    operands.append(l2CacheHint)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def dstMem(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def srcMem(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0]

  @builtins.property
  def mbar(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range[0]

  @builtins.property
  def size(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0]

  @builtins.property
  def multicastMask(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def l2CacheHint(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 5)
    return operand_range[0] if len(operand_range) > 0 else None

def cp_async_bulk_shared_cluster_global(dst_mem, src_mem, mbar, size, *, multicast_mask=None, l2_cache_hint=None, loc=None, ip=None) -> CpAsyncBulkGlobalToSharedClusterOp:
  return CpAsyncBulkGlobalToSharedClusterOp(dstMem=dst_mem, srcMem=src_mem, mbar=mbar, size=size, multicastMask=multicast_mask, l2CacheHint=l2_cache_hint, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncBulkPrefetchOp(_ods_ir.OpView):
  r"""
  Initiates an asynchronous prefetch of data from the location
  specified by `srcMem` to the L2 cache.
  
  The `l2CacheHint` operand is optional, and it is used to specify cache
  eviction policy that may be used during the memory access.
  
  Example:
  ```mlir
    nvvm.cp.async.bulk.prefetch %src, %size : !llvm.ptr<1>
  
    // with l2_cache_hint
    nvvm.cp.async.bulk.prefetch %src, %size l2_cache_hint = %ch : !llvm.ptr<1>
  ```
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-prefetch)
  """

  OPERATION_NAME = "nvvm.cp.async.bulk.prefetch"

  _ODS_REGIONS = (0, True)

  def __init__(self, srcMem, size, *, l2CacheHint=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(srcMem)
    operands.append(size)
    if l2CacheHint is not None: operands.append(l2CacheHint)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def srcMem(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def size(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def l2CacheHint(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 3 else self.operation.operands[2]

def cp_async_bulk_prefetch(src_mem, size, *, l2_cache_hint=None, loc=None, ip=None) -> CpAsyncBulkPrefetchOp:
  return CpAsyncBulkPrefetchOp(srcMem=src_mem, size=size, l2CacheHint=l2_cache_hint, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncBulkSharedCTAToGlobalOp(_ods_ir.OpView):
  r"""
  Initiates an asynchronous copy operation from Shared CTA memory to
  global memory. The 32-bit operand `size` specifies the amount of
  memory to be copied, in terms of number of bytes. `size` must be a
  multiple of 16. The `l2CacheHint` operand is optional, and it is used
  to specify cache eviction policy that may be used during the memory
  access. The `byteMask` operand is optional. The i-th bit in the 16-bit
  wide `byteMask` specifies whether the i-th byte of each 16-byte wide
  chunk of source data is copied to the destination. If the bit is set,
  the byte is copied.
  
  Example:
  ```mlir
    nvvm.cp.async.bulk.global.shared.cta %dst, %src, %size
        : !llvm.ptr<1>, !llvm.ptr<3>
  
    // with l2_cache_hint
    nvvm.cp.async.bulk.global.shared.cta %dst, %src, %size l2_cache_hint = %ch
        : !llvm.ptr<1>, !llvm.ptr<3>
  
    // with byte_mask
    nvvm.cp.async.bulk.global.shared.cta %dst, %src, %size byte_mask = %mask
        : !llvm.ptr<1>, !llvm.ptr<3>
  
    // with both l2_cache_hint and byte_mask
    nvvm.cp.async.bulk.global.shared.cta %dst, %src, %size l2_cache_hint = %ch byte_mask = %mask
        : !llvm.ptr<1>, !llvm.ptr<3>
  ```
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk)
  """

  OPERATION_NAME = "nvvm.cp.async.bulk.global.shared.cta"

  _ODS_OPERAND_SEGMENTS = [1,1,1,0,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, dstMem, srcMem, size, *, l2CacheHint=None, byteMask=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(dstMem)
    operands.append(srcMem)
    operands.append(size)
    operands.append(l2CacheHint)
    operands.append(byteMask)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def dstMem(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def srcMem(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0]

  @builtins.property
  def size(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range[0]

  @builtins.property
  def l2CacheHint(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def byteMask(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range[0] if len(operand_range) > 0 else None

def cp_async_bulk_global_shared_cta(dst_mem, src_mem, size, *, l2_cache_hint=None, byte_mask=None, loc=None, ip=None) -> CpAsyncBulkSharedCTAToGlobalOp:
  return CpAsyncBulkSharedCTAToGlobalOp(dstMem=dst_mem, srcMem=src_mem, size=size, l2CacheHint=l2_cache_hint, byteMask=byte_mask, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncBulkSharedCTAToSharedClusterOp(_ods_ir.OpView):
  r"""
  Initiates an asynchronous copy operation from Shared CTA memory to Shared
  cluster memory.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk)
  """

  OPERATION_NAME = "nvvm.cp.async.bulk.shared.cluster.shared.cta"

  _ODS_REGIONS = (0, True)

  def __init__(self, dstMem, srcMem, mbar, size, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(dstMem)
    operands.append(srcMem)
    operands.append(mbar)
    operands.append(size)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def dstMem(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def srcMem(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def mbar(self) -> _ods_ir.Value:
    return self.operation.operands[2]

  @builtins.property
  def size(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[3]

def cp_async_bulk_shared_cluster_shared_cta(dst_mem, src_mem, mbar, size, *, loc=None, ip=None) -> CpAsyncBulkSharedCTAToSharedClusterOp:
  return CpAsyncBulkSharedCTAToSharedClusterOp(dstMem=dst_mem, srcMem=src_mem, mbar=mbar, size=size, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncBulkTensorGlobalToSharedClusterOp(_ods_ir.OpView):
  r"""
  Initiates an asynchronous copy operation on the tensor data from global 
  memory to shared::cluster (or) shared::cta memory. This Op supports all
  the load modes specified in `TMALoadMode`.
  
  The `multicastMask` operand is optional. When it is present, the Op copies
  data from global memory to shared memory of multiple CTAs in the cluster.
  Operand `multicastMask` specifies the destination CTAs in the cluster such 
  that each bit position in the 16-bit `multicastMask` operand corresponds to
  the `nvvm.read.ptx.sreg.ctaid` of the destination CTA.     
  
  The `l2CacheHint` operand is optional, and it is used to specify cache 
  eviction policy that may be used during the memory access.
  
  When the `isCTAOnly` attribute is set to true, the destination is
  shared::cta only. Hence, `multicastMask` and `CTAGroup` are not applicable
  when `isCTAOnly` is true.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor)
  """

  OPERATION_NAME = "nvvm.cp.async.bulk.tensor.shared.cluster.global"

  _ODS_OPERAND_SEGMENTS = [1,1,-1,1,-1,0,0,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, dstMem, tmaDescriptor, coordinates, mbar, im2colOffsets, *, multicastMask=None, l2CacheHint=None, mode=None, isCTAOnly=None, group=None, predicate=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(dstMem)
    operands.append(tmaDescriptor)
    operands.append(_get_op_results_or_values(coordinates))
    operands.append(mbar)
    operands.append(_get_op_results_or_values(im2colOffsets))
    operands.append(multicastMask)
    operands.append(l2CacheHint)
    operands.append(predicate)
    _ods_context = _ods_get_default_loc_context(loc)
    if mode is not None: attributes["mode"] = (mode if (
        isinstance(mode, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('TMALoadModeAttr')) else
          _ods_ir.AttrBuilder.get('TMALoadModeAttr')(mode, context=_ods_context))
    if isCTAOnly is not None: attributes["isCTAOnly"] = (isCTAOnly if (
        isinstance(isCTAOnly, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(isCTAOnly, context=_ods_context))
    if group is not None: attributes["group"] = (group if (
        isinstance(group, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
          _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(group, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def dstMem(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def tmaDescriptor(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0]

  @builtins.property
  def coordinates(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range

  @builtins.property
  def mbar(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0]

  @builtins.property
  def im2colOffsets(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range

  @builtins.property
  def multicastMask(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 5)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def l2CacheHint(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 6)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def predicate(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 7)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def mode(self) -> _ods_ir.Attribute:
    return self.operation.attributes["mode"]

  @mode.setter
  def mode(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["mode"] = value

  @builtins.property
  def isCTAOnly(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["isCTAOnly"]

  @isCTAOnly.setter
  def isCTAOnly(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["isCTAOnly"] = value

  @builtins.property
  def group(self) -> _Optional[_ods_ir.Attribute]:
    if "group" not in self.operation.attributes:
      return None
    return self.operation.attributes["group"]

  @group.setter
  def group(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["group"] = value
    elif "group" in self.operation.attributes:
      del self.operation.attributes["group"]

  @group.deleter
  def group(self):
    del self.operation.attributes["group"]

def cp_async_bulk_tensor_shared_cluster_global(dst_mem, tma_descriptor, coordinates, mbar, im2col_offsets, *, multicast_mask=None, l2_cache_hint=None, mode=None, is_cta_only=None, group=None, predicate=None, loc=None, ip=None) -> CpAsyncBulkTensorGlobalToSharedClusterOp:
  return CpAsyncBulkTensorGlobalToSharedClusterOp(dstMem=dst_mem, tmaDescriptor=tma_descriptor, coordinates=coordinates, mbar=mbar, im2colOffsets=im2col_offsets, multicastMask=multicast_mask, l2CacheHint=l2_cache_hint, mode=mode, isCTAOnly=is_cta_only, group=group, predicate=predicate, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncBulkTensorPrefetchOp(_ods_ir.OpView):
  r"""
  Initiates an asynchronous prefetch operation on the tensor data from global
  memory to L2 cache. This Op supports all the load modes specified in
  `TMALoadMode`.
  
  The `l2CacheHint` operand is optional, and it is used to specify cache
  eviction policy that may be used during the memory access.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-prefetch-tensor)
  """

  OPERATION_NAME = "nvvm.cp.async.bulk.tensor.prefetch"

  _ODS_OPERAND_SEGMENTS = [1,-1,-1,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, tmaDescriptor, coordinates, im2colOffsets, *, mode=None, l2CacheHint=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(tmaDescriptor)
    operands.append(_get_op_results_or_values(coordinates))
    operands.append(_get_op_results_or_values(im2colOffsets))
    operands.append(l2CacheHint)
    _ods_context = _ods_get_default_loc_context(loc)
    if mode is not None: attributes["mode"] = (mode if (
        isinstance(mode, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('TMALoadModeAttr')) else
          _ods_ir.AttrBuilder.get('TMALoadModeAttr')(mode, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def tmaDescriptor(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def coordinates(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range

  @builtins.property
  def im2colOffsets(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range

  @builtins.property
  def l2CacheHint(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def mode(self) -> _ods_ir.Attribute:
    return self.operation.attributes["mode"]

  @mode.setter
  def mode(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["mode"] = value

def cp_async_bulk_tensor_prefetch(tma_descriptor, coordinates, im2col_offsets, *, mode=None, l2_cache_hint=None, loc=None, ip=None) -> CpAsyncBulkTensorPrefetchOp:
  return CpAsyncBulkTensorPrefetchOp(tmaDescriptor=tma_descriptor, coordinates=coordinates, im2colOffsets=im2col_offsets, mode=mode, l2CacheHint=l2_cache_hint, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncBulkTensorReduceOp(_ods_ir.OpView):
  r"""
  Initiates an asynchronous reduction operation of tensor data in
  global memory with tensor data in shared memory.
  
  The `mode` attribute indicates whether the copy mode is tile or im2col.
  The `redOp` attribute specifies the reduction operations applied.
  The supported reduction operations are:
  {add, min, max, inc, dec, and, or, xor}
  
  The `l2CacheHint` operand is optional, and it is used to specify cache
  eviction policy that may be used during the memory access.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-reduce-async-bulk-tensor)
  """

  OPERATION_NAME = "nvvm.cp.async.bulk.tensor.reduce"

  _ODS_OPERAND_SEGMENTS = [1,1,-1,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, tmaDescriptor, srcMem, redKind, coordinates, *, mode=None, l2CacheHint=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(tmaDescriptor)
    operands.append(srcMem)
    operands.append(_get_op_results_or_values(coordinates))
    operands.append(l2CacheHint)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["redKind"] = (redKind if (
    isinstance(redKind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('TMAReduxKindAttr')) else
      _ods_ir.AttrBuilder.get('TMAReduxKindAttr')(redKind, context=_ods_context))
    if mode is not None: attributes["mode"] = (mode if (
        isinstance(mode, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('TMAStoreModeAttr')) else
          _ods_ir.AttrBuilder.get('TMAStoreModeAttr')(mode, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def tmaDescriptor(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def srcMem(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0]

  @builtins.property
  def coordinates(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range

  @builtins.property
  def l2CacheHint(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def redKind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["redKind"]

  @redKind.setter
  def redKind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["redKind"] = value

  @builtins.property
  def mode(self) -> _ods_ir.Attribute:
    return self.operation.attributes["mode"]

  @mode.setter
  def mode(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["mode"] = value

def cp_async_bulk_tensor_reduce(tma_descriptor, src_mem, red_kind, coordinates, *, mode=None, l2_cache_hint=None, loc=None, ip=None) -> CpAsyncBulkTensorReduceOp:
  return CpAsyncBulkTensorReduceOp(tmaDescriptor=tma_descriptor, srcMem=src_mem, redKind=red_kind, coordinates=coordinates, mode=mode, l2CacheHint=l2_cache_hint, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncBulkTensorSharedCTAToGlobalOp(_ods_ir.OpView):
  r"""
  Initiates an asynchronous copy of the tensor data from shared::cta
  memory to global memory. This Op supports all the store modes specified in
  `TMAStoreMode`.
  
  The `l2CacheHint` operand is optional, and it is used to specify cache
  eviction policy that may be used during the memory access.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-cp-async-bulk-tensor)
  """

  OPERATION_NAME = "nvvm.cp.async.bulk.tensor.global.shared.cta"

  _ODS_OPERAND_SEGMENTS = [1,1,-1,0,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, tmaDescriptor, srcMem, coordinates, *, l2CacheHint=None, mode=None, predicate=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(tmaDescriptor)
    operands.append(srcMem)
    operands.append(_get_op_results_or_values(coordinates))
    operands.append(l2CacheHint)
    operands.append(predicate)
    _ods_context = _ods_get_default_loc_context(loc)
    if mode is not None: attributes["mode"] = (mode if (
        isinstance(mode, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('TMAStoreModeAttr')) else
          _ods_ir.AttrBuilder.get('TMAStoreModeAttr')(mode, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def tmaDescriptor(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def srcMem(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0]

  @builtins.property
  def coordinates(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range

  @builtins.property
  def l2CacheHint(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def predicate(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def mode(self) -> _ods_ir.Attribute:
    return self.operation.attributes["mode"]

  @mode.setter
  def mode(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["mode"] = value

def cp_async_bulk_tensor_global_shared_cta(tma_descriptor, src_mem, coordinates, *, l2_cache_hint=None, mode=None, predicate=None, loc=None, ip=None) -> CpAsyncBulkTensorSharedCTAToGlobalOp:
  return CpAsyncBulkTensorSharedCTAToGlobalOp(tmaDescriptor=tma_descriptor, srcMem=src_mem, coordinates=coordinates, l2CacheHint=l2_cache_hint, mode=mode, predicate=predicate, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncBulkWaitGroupOp(_ods_ir.OpView):
  r"""
  Op waits for completion of the most recent bulk async-groups.
  
  The `$group` operand tells waiting has to be done until for $group or fewer
  of the most recent bulk async-groups. If `$group` is 0, the op wait until 
  all the most recent bulk async-groups have completed.
  
  The `$read` indicates that the waiting has to be done until all the bulk 
  async operations in the specified bulk async-group have completed reading 
  from their source locations.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-wait-group)
  """

  OPERATION_NAME = "nvvm.cp.async.bulk.wait_group"

  _ODS_REGIONS = (0, True)

  def __init__(self, group, *, read=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["group"] = (group if (
    isinstance(group, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(group, context=_ods_context))
    if bool(read): attributes["read"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def group(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["group"]

  @group.setter
  def group(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["group"] = value

  @builtins.property
  def read(self) -> bool:
    return "read" in self.operation.attributes

  @read.setter
  def read(self, value):
    if bool(value):
      self.operation.attributes["read"] = _ods_ir.UnitAttr.get()
    elif "read" in self.operation.attributes:
      del self.operation.attributes["read"]

  @read.deleter
  def read(self):
    del self.operation.attributes["read"]

def cp_async_bulk_wait_group(group, *, read=None, loc=None, ip=None) -> CpAsyncBulkWaitGroupOp:
  return CpAsyncBulkWaitGroupOp(group=group, read=read, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncCommitGroupOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.cp.async.commit.group"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

def cp_async_commit_group(*, loc=None, ip=None) -> CpAsyncCommitGroupOp:
  return CpAsyncCommitGroupOp(loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncMBarrierArriveOp(_ods_ir.OpView):
  r"""
  The `cp.async.mbarrier.arrive` Op makes the *mbarrier object* track
  all prior cp.async operations initiated by the executing thread.
  The `addr` operand specifies the address of the *mbarrier object*
  in generic or shared::cta address space. When it is generic, the
  underlying memory should fall within the shared::cta space;
  otherwise the behavior is undefined. The `noinc` attr impacts
  how the mbarrier's state is updated.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
  """

  OPERATION_NAME = "nvvm.cp.async.mbarrier.arrive"

  _ODS_REGIONS = (0, True)

  def __init__(self, addr, *, noinc=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    _ods_context = _ods_get_default_loc_context(loc)
    if noinc is not None: attributes["noinc"] = (noinc if (
        isinstance(noinc, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('I1Attr')) else
          _ods_ir.AttrBuilder.get('I1Attr')(noinc, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def noinc(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["noinc"]

  @noinc.setter
  def noinc(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["noinc"] = value

def cp_async_mbarrier_arrive(addr, *, noinc=None, loc=None, ip=None) -> CpAsyncMBarrierArriveOp:
  return CpAsyncMBarrierArriveOp(addr=addr, noinc=noinc, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.cp.async.shared.global"

  _ODS_REGIONS = (0, True)

  def __init__(self, dst, src, size, modifier, *, cpSize=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(dst)
    operands.append(src)
    if cpSize is not None: operands.append(cpSize)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["size"] = (size if (
    isinstance(size, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(size, context=_ods_context))
    attributes["modifier"] = (modifier if (
    isinstance(modifier, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('LoadCacheModifierAttr')) else
      _ods_ir.AttrBuilder.get('LoadCacheModifierAttr')(modifier, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def dst(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def src(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def cpSize(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 3 else self.operation.operands[2]

  @builtins.property
  def size(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["size"]

  @size.setter
  def size(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["size"] = value

  @builtins.property
  def modifier(self) -> _ods_ir.Attribute:
    return self.operation.attributes["modifier"]

  @modifier.setter
  def modifier(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["modifier"] = value

def cp_async_shared_global(dst, src, size, modifier, *, cp_size=None, loc=None, ip=None) -> CpAsyncOp:
  return CpAsyncOp(dst=dst, src=src, size=size, modifier=modifier, cpSize=cp_size, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class CpAsyncWaitGroupOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.cp.async.wait.group"

  _ODS_REGIONS = (0, True)

  def __init__(self, n, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["n"] = (n if (
    isinstance(n, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(n, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def n(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["n"]

  @n.setter
  def n(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["n"] = value

def cp_async_wait_group(n, *, loc=None, ip=None) -> CpAsyncWaitGroupOp:
  return CpAsyncWaitGroupOp(n=n, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class DotAccumulate2WayOp(_ods_ir.OpView):
  r"""
  Performs a two-way 16-bit to 8-bit dot-product which is accumulated in a 
  32-bit result.
  Operand `a` is a vector of two 16-bit elements and operand `b` a vector 
  of four 8-bit elements between which the dot product is computed.
  
  The `a_type` and `b_type` attributes specify the type of the elements in `a`
  and `b` respectively.
  If `a_type` or `b_type` is `s`, then the elements in the corresponding 
  vector are sign-extended to 32-bit before the dot product is computed.
  If `a_type` or `b_type` is `u`, then the elements in the corresponding 
  vector are zero-extended to 32-bit instead.
  
  The `b_hi` boolean attribute specifies which two bytes of `b` are used for 
  the dot product. If `b_hi` is true, then the dot product is computed 
  between  `a` and elements at indices 2 and 3 of `b`. If `b_hi` is false, 
  then the dot product is computed between `a` and elements at indices 0 and 
  1 of `b`.
  
  Operand `c` is a 32-bit integer to which the result is accumulated. It is
  treated as holding a signed integer if any of `a_type` or `b_type` is 
  signed.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#integer-arithmetic-instructions-dp2a)
  """

  OPERATION_NAME = "nvvm.dot.accumulate.2way"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, a, a_type, b, b_type, c, b_hi, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(a)
    operands.append(b)
    operands.append(c)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["a_type"] = (a_type if (
    isinstance(a_type, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('DotAccumulateTypeAttr')) else
      _ods_ir.AttrBuilder.get('DotAccumulateTypeAttr')(a_type, context=_ods_context))
    attributes["b_type"] = (b_type if (
    isinstance(b_type, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('DotAccumulateTypeAttr')) else
      _ods_ir.AttrBuilder.get('DotAccumulateTypeAttr')(b_type, context=_ods_context))
    attributes["b_hi"] = (b_hi if (
    isinstance(b_hi, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('BoolAttr')) else
      _ods_ir.AttrBuilder.get('BoolAttr')(b_hi, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def a(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def b(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[1]

  @builtins.property
  def c(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[2]

  @builtins.property
  def a_type(self) -> _ods_ir.Attribute:
    return self.operation.attributes["a_type"]

  @a_type.setter
  def a_type(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["a_type"] = value

  @builtins.property
  def b_type(self) -> _ods_ir.Attribute:
    return self.operation.attributes["b_type"]

  @b_type.setter
  def b_type(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["b_type"] = value

  @builtins.property
  def b_hi(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["b_hi"]

  @b_hi.setter
  def b_hi(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["b_hi"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def dot_accumulate_2way(res, a, a_type, b, b_type, c, b_hi, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return DotAccumulate2WayOp(res=res, a=a, a_type=a_type, b=b, b_type=b_type, c=c, b_hi=b_hi, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class DotAccumulate4WayOp(_ods_ir.OpView):
  r"""
  Performs a four-way byte dot-product which is accumulated in a 32-bit
  result.
  Operand `a` and `b` are vectors of 4 bytes between which the dot product is 
  computed.
  
  The `a_type` and `b_type` attributes specify the type of the elements in `a`
  and `b` respectively.
  If `a_type` or `b_type` is `signed`, then the elements in the corresponding 
  vector are sign-extended to 32-bit before the dot product is computed.
  If `a_type` or `b_type` is `unsigned`, then the elements in the 
  corresponding vector are zero-extended to 32-bit instead.
  
  Operand `c` is a 32-bit integer to which the result is accumulated. It is
  treated as holding a signed integer if any of `a_type` or `b_type` is `s8`.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#integer-arithmetic-instructions-dp4a)
  """

  OPERATION_NAME = "nvvm.dot.accumulate.4way"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, a, a_type, b, b_type, c, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(a)
    operands.append(b)
    operands.append(c)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["a_type"] = (a_type if (
    isinstance(a_type, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('DotAccumulateTypeAttr')) else
      _ods_ir.AttrBuilder.get('DotAccumulateTypeAttr')(a_type, context=_ods_context))
    attributes["b_type"] = (b_type if (
    isinstance(b_type, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('DotAccumulateTypeAttr')) else
      _ods_ir.AttrBuilder.get('DotAccumulateTypeAttr')(b_type, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def a(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[0]

  @builtins.property
  def b(self) -> _ods_ir.Value[_ods_ir.VectorType]:
    return self.operation.operands[1]

  @builtins.property
  def c(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[2]

  @builtins.property
  def a_type(self) -> _ods_ir.Attribute:
    return self.operation.attributes["a_type"]

  @a_type.setter
  def a_type(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["a_type"] = value

  @builtins.property
  def b_type(self) -> _ods_ir.Attribute:
    return self.operation.attributes["b_type"]

  @b_type.setter
  def b_type(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["b_type"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def dot_accumulate_4way(res, a, a_type, b, b_type, c, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return DotAccumulate4WayOp(res=res, a=a, a_type=a_type, b=b, b_type=b_type, c=c, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ElectSyncOp(_ods_ir.OpView):
  r"""
  The `elect.sync` instruction elects one predicated active leader
  thread from among a set of threads specified in the `membermask`.
  When the `membermask` is not provided explicitly, a default value
  of `0xFFFFFFFF` is used. The predicate result is set to `True` for
  the leader thread, and `False` for all other threads.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-elect-sync)
  """

  OPERATION_NAME = "nvvm.elect.sync"

  _ODS_REGIONS = (0, True)

  def __init__(self, pred, *, membermask=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    if membermask is not None: operands.append(membermask)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(pred)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def membermask(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 1 else self.operation.operands[0]

  @builtins.property
  def pred(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def elect_sync(pred, *, membermask=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ElectSyncOp(pred=pred, membermask=membermask, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg0Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg0"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg0(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg0Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg1Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg1"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg1(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg1Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg2Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg2"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg2(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg2Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg3Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg3"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg3(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg3Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg4Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg4"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg4(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg4Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg5Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg5"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg5(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg5Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg6Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg6"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg6(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg6Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg7Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg7"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg7(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg7Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg8Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg8"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg8(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg8Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg9Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg9"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg9(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg9Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg10Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg10"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg10(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg10Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg11Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg11"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg11(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg11Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg12Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg12"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg12(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg12Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg13Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg13"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg13(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg13Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg14Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg14"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg14(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg14Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg15Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg15"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg15(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg15Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg16Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg16"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg16(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg16Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg17Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg17"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg17(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg17Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg18Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg18"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg18(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg18Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg19Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg19"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg19(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg19Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg20Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg20"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg20(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg20Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg21Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg21"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg21(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg21Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg22Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg22"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg22(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg22Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg23Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg23"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg23(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg23Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg24Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg24"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg24(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg24Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg25Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg25"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg25(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg25Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg26Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg26"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg26(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg26Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg27Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg27"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg27(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg27Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg28Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg28"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg28(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg28Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg29Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg29"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg29(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg29Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg30Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg30"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg30(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg30Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class EnvReg31Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.envreg31"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_envreg31(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return EnvReg31Op(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class Exit(_ods_ir.OpView):
  r"""
  Ends execution of a thread.
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-exit)
  """

  OPERATION_NAME = "nvvm.exit"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

def exit(*, loc=None, ip=None) -> Exit:
  return Exit(loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class FenceMbarrierInitOp(_ods_ir.OpView):
  r"""
  Fence operation that applies on the prior nvvm.mbarrier.init
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
  """

  OPERATION_NAME = "nvvm.fence.mbarrier.init"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

def fence_mbarrier_init(*, loc=None, ip=None) -> FenceMbarrierInitOp:
  return FenceMbarrierInitOp(loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class FenceProxyAcquireOp(_ods_ir.OpView):
  r"""
  `fence.proxy.acquire` is a uni-directional fence used to establish ordering
  between a prior memory access performed via the generic proxy and a
  subsequent memory access performed via the tensormap proxy
  
  The address operand `addr` and the operand `size` together specify the
  memory range `[addr, addr+size)` on which the ordering guarantees on the
  memory accesses across the proxies is to be provided. The only supported
  value for the `size` operand is 128 and must be an immediate. Generic Addressing
  is used unconditionally, and the address specified by the operand `addr` must
  fall within the `.global` state space. Otherwise, the behavior is undefined
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
  """

  OPERATION_NAME = "nvvm.fence.proxy.acquire"

  _ODS_REGIONS = (0, True)

  def __init__(self, scope, addr, size, *, fromProxy=None, toProxy=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(size)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["scope"] = (scope if (
    isinstance(scope, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
      _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    if fromProxy is not None: attributes["fromProxy"] = (fromProxy if (
        isinstance(fromProxy, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('ProxyKindAttr')) else
          _ods_ir.AttrBuilder.get('ProxyKindAttr')(fromProxy, context=_ods_context))
    if toProxy is not None: attributes["toProxy"] = (toProxy if (
        isinstance(toProxy, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('ProxyKindAttr')) else
          _ods_ir.AttrBuilder.get('ProxyKindAttr')(toProxy, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def size(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

  @builtins.property
  def fromProxy(self) -> _ods_ir.Attribute:
    return self.operation.attributes["fromProxy"]

  @fromProxy.setter
  def fromProxy(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["fromProxy"] = value

  @builtins.property
  def toProxy(self) -> _ods_ir.Attribute:
    return self.operation.attributes["toProxy"]

  @toProxy.setter
  def toProxy(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["toProxy"] = value

def fence_proxy_acquire(scope, addr, size, *, from_proxy=None, to_proxy=None, loc=None, ip=None) -> FenceProxyAcquireOp:
  return FenceProxyAcquireOp(scope=scope, addr=addr, size=size, fromProxy=from_proxy, toProxy=to_proxy, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class FenceProxyOp(_ods_ir.OpView):
  r"""
  Fence operation with proxy to establish an ordering between memory accesses
  that may happen through different proxies.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
  """

  OPERATION_NAME = "nvvm.fence.proxy"

  _ODS_REGIONS = (0, True)

  def __init__(self, kind, *, space=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('ProxyKindAttr')) else
      _ods_ir.AttrBuilder.get('ProxyKindAttr')(kind, context=_ods_context))
    if space is not None: attributes["space"] = (space if (
        isinstance(space, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('SharedSpaceAttr')) else
          _ods_ir.AttrBuilder.get('SharedSpaceAttr')(space, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def space(self) -> _Optional[_ods_ir.Attribute]:
    if "space" not in self.operation.attributes:
      return None
    return self.operation.attributes["space"]

  @space.setter
  def space(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["space"] = value
    elif "space" in self.operation.attributes:
      del self.operation.attributes["space"]

  @space.deleter
  def space(self):
    del self.operation.attributes["space"]

def fence_proxy(kind, *, space=None, loc=None, ip=None) -> FenceProxyOp:
  return FenceProxyOp(kind=kind, space=space, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class FenceProxyReleaseOp(_ods_ir.OpView):
  r"""
  `fence.proxy.release` is a uni-directional fence used to establish ordering
  between a prior memory access performed via the generic proxy and a
  subsequent memory access performed via the tensormap proxy. `fence.proxy.release`
  operation can form a release sequence that synchronizes with an acquire
  sequence that contains the fence.proxy.acquire proxy fence operation
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
  """

  OPERATION_NAME = "nvvm.fence.proxy.release"

  _ODS_REGIONS = (0, True)

  def __init__(self, scope, *, fromProxy=None, toProxy=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["scope"] = (scope if (
    isinstance(scope, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
      _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    if fromProxy is not None: attributes["fromProxy"] = (fromProxy if (
        isinstance(fromProxy, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('ProxyKindAttr')) else
          _ods_ir.AttrBuilder.get('ProxyKindAttr')(fromProxy, context=_ods_context))
    if toProxy is not None: attributes["toProxy"] = (toProxy if (
        isinstance(toProxy, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('ProxyKindAttr')) else
          _ods_ir.AttrBuilder.get('ProxyKindAttr')(toProxy, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

  @builtins.property
  def fromProxy(self) -> _ods_ir.Attribute:
    return self.operation.attributes["fromProxy"]

  @fromProxy.setter
  def fromProxy(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["fromProxy"] = value

  @builtins.property
  def toProxy(self) -> _ods_ir.Attribute:
    return self.operation.attributes["toProxy"]

  @toProxy.setter
  def toProxy(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["toProxy"] = value

def fence_proxy_release(scope, *, from_proxy=None, to_proxy=None, loc=None, ip=None) -> FenceProxyReleaseOp:
  return FenceProxyReleaseOp(scope=scope, fromProxy=from_proxy, toProxy=to_proxy, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class FenceProxySyncRestrictOp(_ods_ir.OpView):
  r"""
  The `nvvm.fence.proxy.sync_restrict` Op used to establish
  ordering between a prior memory access performed between proxies. Currently,
  the ordering is only supported between async and generic proxies. `sync_restrict`
  restricts `acquire` memory semantics to `shared_cluster` and `release` memory
  semantics to `shared_cta` with cluster scope.
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
  """

  OPERATION_NAME = "nvvm.fence.proxy.sync_restrict"

  _ODS_REGIONS = (0, True)

  def __init__(self, order, *, fromProxy=None, toProxy=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["order"] = (order if (
    isinstance(order, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MemOrderKindAttr')) else
      _ods_ir.AttrBuilder.get('MemOrderKindAttr')(order, context=_ods_context))
    if fromProxy is not None: attributes["fromProxy"] = (fromProxy if (
        isinstance(fromProxy, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('ProxyKindAttr')) else
          _ods_ir.AttrBuilder.get('ProxyKindAttr')(fromProxy, context=_ods_context))
    if toProxy is not None: attributes["toProxy"] = (toProxy if (
        isinstance(toProxy, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('ProxyKindAttr')) else
          _ods_ir.AttrBuilder.get('ProxyKindAttr')(toProxy, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def order(self) -> _ods_ir.Attribute:
    return self.operation.attributes["order"]

  @order.setter
  def order(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["order"] = value

  @builtins.property
  def fromProxy(self) -> _ods_ir.Attribute:
    return self.operation.attributes["fromProxy"]

  @fromProxy.setter
  def fromProxy(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["fromProxy"] = value

  @builtins.property
  def toProxy(self) -> _ods_ir.Attribute:
    return self.operation.attributes["toProxy"]

  @toProxy.setter
  def toProxy(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["toProxy"] = value

def fence_proxy_sync_restrict(order, *, from_proxy=None, to_proxy=None, loc=None, ip=None) -> FenceProxySyncRestrictOp:
  return FenceProxySyncRestrictOp(order=order, fromProxy=from_proxy, toProxy=to_proxy, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class FenceScClusterOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.fence.sc.cluster"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

def fence_sc_cluster(*, loc=None, ip=None) -> FenceScClusterOp:
  return FenceScClusterOp(loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class FenceSyncRestrictOp(_ods_ir.OpView):
  r"""
  The `nvvm.fence.sync_restrict` Op restricts the class of memory
  operations for which the fence instruction provides the memory ordering guarantees.
  `sync_restrict` restricts `acquire` memory semantics to `shared_cluster` and
  `release` memory semantics to `shared_cta` with cluster scope.
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
  """

  OPERATION_NAME = "nvvm.fence.sync_restrict"

  _ODS_REGIONS = (0, True)

  def __init__(self, order, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["order"] = (order if (
    isinstance(order, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MemOrderKindAttr')) else
      _ods_ir.AttrBuilder.get('MemOrderKindAttr')(order, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def order(self) -> _ods_ir.Attribute:
    return self.operation.attributes["order"]

  @order.setter
  def order(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["order"] = value

def fence_sync_restrict(order, *, loc=None, ip=None) -> FenceSyncRestrictOp:
  return FenceSyncRestrictOp(order=order, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class GlobalTimerLoOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.globaltimer.lo"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_globaltimer_lo(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return GlobalTimerLoOp(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class GlobalTimerOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.globaltimer"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_globaltimer(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return GlobalTimerOp(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class GridDimXOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.nctaid.x"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_nctaid_x(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return GridDimXOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class GridDimYOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.nctaid.y"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_nctaid_y(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return GridDimYOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class GridDimZOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.nctaid.z"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_nctaid_z(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return GridDimZOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class GridIdOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.gridid"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_gridid(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return GridIdOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class GriddepcontrolOp(_ods_ir.OpView):
  r"""
  If the $kind attribute is set to `wait`, it causes the 
  executing thread to wait until all prerequisite grids in flight 
  have completed and all the memory operations from the prerequisite grids 
  are performed and made visible to the current grid.
  
  When the $kind is launch_dependents, it signals that specific dependents 
  the runtime system designated to react to this instruction can be scheduled 
  as soon as all other CTAs in the grid issue the same instruction or have 
  completed.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
  """

  OPERATION_NAME = "nvvm.griddepcontrol"

  _ODS_REGIONS = (0, True)

  def __init__(self, kind, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('GridDepActionAttr')) else
      _ods_ir.AttrBuilder.get('GridDepActionAttr')(kind, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

def griddepcontrol(kind, *, loc=None, ip=None) -> GriddepcontrolOp:
  return GriddepcontrolOp(kind=kind, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class InlinePtxOp(_ods_ir.OpView):
  r"""
  This op allows using PTX directly within the NVVM 
      dialect, while greatly simplifying llvm.inline_asm generation. It 
      automatically handles register size selection and sets the correct 
      read/write access for each operand. The operation leverages the 
      `BasicPtxBuilderInterface` to abstract away low-level details of 
      PTX assembly formatting.
  
      The `predicate` attribute is used to specify a predicate for the 
      PTX instruction.
  
      Example 1: Read-only Parameters
      ```mlir
      nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count) : !llvm.ptr, i32
  
      // Lowers to:
      llvm.inline_asm has_side_effects asm_dialect = att 
        "mbarrier.init.b64 [$0], $1;", "l,r" %arg0, %arg2 : (!llvm.ptr, i32) -> ()
      ```
  
      Example 2: Read-only and Write-only Parameters
      ```mlir
      %0 = nvvm.inline_ptx "ex2.approx.ftz.f32 $0, $1;" (%input) : f32 -> f32
  
      // Lowers to:
      %0 = llvm.inline_asm has_side_effects asm_dialect = att 
        "ex2.approx.ftz.f32 $0, $1;", "=f,f" %arg0 : (f32) -> f32
      ```
  
      Example 3: Predicate Usage
      ```mlir
      nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count), 
        predicate = %pred : !llvm.ptr, i32, i1
  
      // Lowers to:
      llvm.inline_asm has_side_effects asm_dialect = att 
        "@$2 mbarrier.init.b64 [$0], $1;", "l,r,b" %arg0, %arg2, %arg3 
        : (!llvm.ptr, i32, i1) -> ()
      ```
  """

  OPERATION_NAME = "nvvm.inline_ptx"

  _ODS_OPERAND_SEGMENTS = [-1,-1,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, writeOnlyArgs, readOnlyArgs, readWriteArgs, ptxCode, *, predicate=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(_get_op_results_or_values(readOnlyArgs))
    operands.append(_get_op_results_or_values(readWriteArgs))
    operands.append(predicate)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["ptxCode"] = (ptxCode if (
    isinstance(ptxCode, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('StrAttr')) else
      _ods_ir.AttrBuilder.get('StrAttr')(ptxCode, context=_ods_context))
    results = []
    results.extend(writeOnlyArgs)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def readOnlyArgs(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range

  @builtins.property
  def readWriteArgs(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range

  @builtins.property
  def predicate(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def ptxCode(self) -> _ods_ir.StringAttr:
    return self.operation.attributes["ptxCode"]

  @ptxCode.setter
  def ptxCode(self, value: _ods_ir.StringAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["ptxCode"] = value

  @builtins.property
  def writeOnlyArgs(self) -> _ods_ir.OpResultList:
    _ods_variadic_group_length = len(self.operation.results) - 1 + 1
    return self.operation.results[0:0 + _ods_variadic_group_length]

def inline_ptx(write_only_args, read_only_args, read_write_args, ptx_code, *, predicate=None, loc=None, ip=None) -> _Union[_ods_ir.OpResult, _ods_ir.OpResultList, InlinePtxOp]:
  op = InlinePtxOp(writeOnlyArgs=write_only_args, readOnlyArgs=read_only_args, readWriteArgs=read_write_args, ptxCode=ptx_code, predicate=predicate, loc=loc, ip=ip); results = op.results
  return results if len(results) > 1 else (results[0] if len(results) == 1 else op)

@_ods_cext.register_operation(_Dialect)
class LaneIdOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.laneid"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_laneid(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return LaneIdOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class LaneMaskEqOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.lanemask.eq"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_lanemask_eq(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return LaneMaskEqOp(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class LaneMaskGeOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.lanemask.ge"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_lanemask_ge(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return LaneMaskGeOp(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class LaneMaskGtOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.lanemask.gt"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_lanemask_gt(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return LaneMaskGtOp(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class LaneMaskLeOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.lanemask.le"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_lanemask_le(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return LaneMaskLeOp(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class LaneMaskLtOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.lanemask.lt"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_lanemask_lt(res, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return LaneMaskLtOp(res=res, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class LdMatrixOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.ldmatrix"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, ptr, num, layout, shape, eltType, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(ptr)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["num"] = (num if (
    isinstance(num, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(num, context=_ods_context))
    attributes["layout"] = (layout if (
    isinstance(layout, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMALayoutAttr')) else
      _ods_ir.AttrBuilder.get('MMALayoutAttr')(layout, context=_ods_context))
    attributes["shape"] = (shape if (
    isinstance(shape, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('LdStMatrixShapeAttr')) else
      _ods_ir.AttrBuilder.get('LdStMatrixShapeAttr')(shape, context=_ods_context))
    attributes["eltType"] = (eltType if (
    isinstance(eltType, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('LdStMatrixEltTypeAttr')) else
      _ods_ir.AttrBuilder.get('LdStMatrixEltTypeAttr')(eltType, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def ptr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def num(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["num"]

  @num.setter
  def num(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["num"] = value

  @builtins.property
  def layout(self) -> _ods_ir.Attribute:
    return self.operation.attributes["layout"]

  @layout.setter
  def layout(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["layout"] = value

  @builtins.property
  def shape(self) -> _ods_ir.Attribute:
    return self.operation.attributes["shape"]

  @shape.setter
  def shape(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["shape"] = value

  @builtins.property
  def eltType(self) -> _ods_ir.Attribute:
    return self.operation.attributes["eltType"]

  @eltType.setter
  def eltType(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["eltType"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def ldmatrix(res, ptr, num, layout, shape, elt_type, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return LdMatrixOp(res=res, ptr=ptr, num=num, layout=layout, shape=shape, eltType=elt_type, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class MBarrierArriveDropExpectTxOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.arrive_drop.expect_tx` operation is similar to the
  `nvvm.mbarrier.arrive.expect_tx` operation except that it performs an
  `arrive_drop` operation instead of only an `arrive` operation.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive-drop)
  """

  OPERATION_NAME = "nvvm.mbarrier.arrive_drop.expect_tx"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, addr, txcount, *, scope=None, relaxed=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(txcount)
    _ods_context = _ods_get_default_loc_context(loc)
    if scope is not None: attributes["scope"] = (scope if (
        isinstance(scope, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
          _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    if relaxed is not None: attributes["relaxed"] = (relaxed if (
        isinstance(relaxed, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relaxed, context=_ods_context))
    results = []
    if res is not None: results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def txcount(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

  @builtins.property
  def relaxed(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relaxed"]

  @relaxed.setter
  def relaxed(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relaxed"] = value

  @builtins.property
  def res(self) -> _Optional[_ods_ir.OpResult[_ods_ir.IntegerType]]:
    return None if len(self.operation.results) < 1 else self.operation.results[0]

def mbarrier_arrive_drop_expect_tx(res, addr, txcount, *, scope=None, relaxed=None, loc=None, ip=None) -> _Union[_ods_ir.OpResult, _ods_ir.OpResultList, MBarrierArriveDropExpectTxOp]:
  op = MBarrierArriveDropExpectTxOp(res=res, addr=addr, txcount=txcount, scope=scope, relaxed=relaxed, loc=loc, ip=ip); results = op.results
  return results if len(results) > 1 else (results[0] if len(results) == 1 else op)

@_ods_cext.register_operation(_Dialect)
class MBarrierArriveDropNocompleteOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.arrive_drop.nocomplete` operation decrements the expected
  arrival count of the *mbarrier object* by the amount `count` and then performs
  an arrive-on operation on the *mbarrier object* with the guarantee that it
  will not cause the barrier to complete its current phase.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive-drop)
  """

  OPERATION_NAME = "nvvm.mbarrier.arrive_drop.nocomplete"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, addr, count, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(count)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def count(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def res(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def mbarrier_arrive_drop_nocomplete(res, addr, count, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return MBarrierArriveDropNocompleteOp(res=res, addr=addr, count=count, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class MBarrierArriveDropOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.arrive_drop` operation decrements the expected arrival
  count of the *mbarrier object* by `count` and then performs an arrive-on
  operation. When `count` is not specified, it defaults to 1. The decrement
  of the expected arrival count applies to all the subsequent phases of the
  *mbarrier object*. The remaining semantics are identical to those of the
  `nvvm.mbarrier.arrive` operation.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive-drop)
  """

  OPERATION_NAME = "nvvm.mbarrier.arrive_drop"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, addr, *, count=None, scope=None, relaxed=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    if count is not None: operands.append(count)
    _ods_context = _ods_get_default_loc_context(loc)
    if scope is not None: attributes["scope"] = (scope if (
        isinstance(scope, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
          _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    if relaxed is not None: attributes["relaxed"] = (relaxed if (
        isinstance(relaxed, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relaxed, context=_ods_context))
    results = []
    if res is not None: results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def count(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 2 else self.operation.operands[1]

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

  @builtins.property
  def relaxed(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relaxed"]

  @relaxed.setter
  def relaxed(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relaxed"] = value

  @builtins.property
  def res(self) -> _Optional[_ods_ir.OpResult[_ods_ir.IntegerType]]:
    return None if len(self.operation.results) < 1 else self.operation.results[0]

def mbarrier_arrive_drop(res, addr, *, count=None, scope=None, relaxed=None, loc=None, ip=None) -> _Union[_ods_ir.OpResult, _ods_ir.OpResultList, MBarrierArriveDropOp]:
  op = MBarrierArriveDropOp(res=res, addr=addr, count=count, scope=scope, relaxed=relaxed, loc=loc, ip=ip); results = op.results
  return results if len(results) > 1 else (results[0] if len(results) == 1 else op)

@_ods_cext.register_operation(_Dialect)
class MBarrierArriveExpectTxOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.arrive.expect_tx` operation performs an expect-tx operation 
  followed by an arrive-on operation on the *mbarrier object*. Uses the default 
  `.release.cta` semantics. This release pattern establishes memory ordering for 
  operations occurring in program order before this arrive instruction by making 
  operations from the current thread visible to subsequent operations in other 
  threads within the CTA. When other threads perform corresponding acquire operations 
  (like 'mbarrier.test.wait'), they synchronize with this release pattern.
  
  This operation first performs an expect-tx operation with the specified transaction
  count, then performs an arrive-on operation with an implicit count of 1. The
  expect-tx operation increases the expect-count of the *mbarrier object* by the
  specified value (i.e. `txcount`), setting the current phase to expect and track
  the completion of additional asynchronous transactions.
  
  The operation takes the following operands:
  - `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic 
    addressing, but the address must still be in the shared memory space.
  - `txcount`: An unsigned integer specifying the expected transaction count 
    for the expect-tx operation. This represents the number of asynchronous transactions 
    expected to complete before the barrier phase completes.
  - `scope`: This specifies the set of threads that directly observe the memory
    synchronizing effect of the `mbarrier.test.wait` operation.
  - `relaxed`: When set to true, the `arrive` operation has relaxed memory semantics
    and does not provide any ordering or visibility guarantees.
  - `predicate`: Optional predicate for conditional execution used only when lowering to
    inline-ptx.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive-drop)
  """

  OPERATION_NAME = "nvvm.mbarrier.arrive.expect_tx"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, addr, txcount, *, scope=None, relaxed=None, predicate=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(txcount)
    if predicate is not None: operands.append(predicate)
    _ods_context = _ods_get_default_loc_context(loc)
    if scope is not None: attributes["scope"] = (scope if (
        isinstance(scope, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
          _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    if relaxed is not None: attributes["relaxed"] = (relaxed if (
        isinstance(relaxed, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relaxed, context=_ods_context))
    results = []
    if res is not None: results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def txcount(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def predicate(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 3 else self.operation.operands[2]

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

  @builtins.property
  def relaxed(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relaxed"]

  @relaxed.setter
  def relaxed(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relaxed"] = value

  @builtins.property
  def res(self) -> _Optional[_ods_ir.OpResult[_ods_ir.IntegerType]]:
    return None if len(self.operation.results) < 1 else self.operation.results[0]

def mbarrier_arrive_expect_tx(res, addr, txcount, *, scope=None, relaxed=None, predicate=None, loc=None, ip=None) -> _Union[_ods_ir.OpResult, _ods_ir.OpResultList, MBarrierArriveExpectTxOp]:
  op = MBarrierArriveExpectTxOp(res=res, addr=addr, txcount=txcount, scope=scope, relaxed=relaxed, predicate=predicate, loc=loc, ip=ip); results = op.results
  return results if len(results) > 1 else (results[0] if len(results) == 1 else op)

@_ods_cext.register_operation(_Dialect)
class MBarrierArriveNocompleteOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.arrive.nocomplete` operation performs an arrive-on operation 
  on the *mbarrier object* with the guarantee that it will not cause the barrier to 
  complete its current phase. Uses the default `.release.cta` semantics. This release 
  pattern establishes memory ordering for operations occurring in program order before 
  this arrive instruction by making operations from the current thread visible to 
  subsequent operations in other threads within the CTA. When other threads perform 
  corresponding acquire operations (like 'mbarrier.test.wait'), they synchronize with 
  this release pattern.
  
  This operation causes the executing thread to signal its arrival at the barrier 
  with a specified count, but ensures that the barrier phase will not complete as 
  a result of this operation. The operation returns an opaque value that 
  captures the phase of the *mbarrier object* prior to the arrive-on operation.
  
  The operation takes the following operands:
  - `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
    must be a pointer to generic or shared::cta memory. When it is generic, the
    underlying address must be within the shared::cta memory space; otherwise
    the behavior is undefined.
  - `count`: Integer specifying the count argument to the arrive-on operation. 
    Must be in the valid range as specified in the *mbarrier object* contents.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
  """

  OPERATION_NAME = "nvvm.mbarrier.arrive.nocomplete"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, addr, count, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(count)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def count(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def res(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def mbarrier_arrive_nocomplete(res, addr, count, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return MBarrierArriveNocompleteOp(res=res, addr=addr, count=count, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class MBarrierArriveOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.arrive` operation performs an arrive-on operation on the 
  *mbarrier object* at the specified address. Uses the default `.release.cta` semantics. 
  This release pattern establishes memory ordering for operations occurring in program 
  order before this arrive instruction by making operations from the current thread 
  visible to subsequent operations in other threads within the CTA. When other threads 
  perform corresponding acquire operations (like 'mbarrier.test.wait'), they synchronize 
  with this release pattern.
  
  This operation causes the executing thread to signal its arrival at the barrier.
  
  - `res`: When the `space` is not shared_cluster, this operation returns an
    opaque 64-bit value capturing the phase of the *mbarrier object* prior to
    the arrive-on operation. The contents of this return value are
    implementation-specific. An *mbarrier object* located in the shared_cluster
    space cannot return a value.
  
  The operation takes the following operands:
  - `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
    must be a pointer to generic or shared_cta or shared_cluster memory. When it
    is generic, the underlying address must be within the shared_cta memory space;
    otherwise the behavior is undefined.
  - `count`: This specifies the amount by which the pending arrival count is
    decremented. If the `count` argument is not specified, the pending arrival
    count is decremented by 1.
  - `scope`: This specifies the set of threads that directly observe the memory
    synchronizing effect of the `mbarrier.arrive` operation.
  - `space`: This indicates the memory space where the mbarrier object resides.
  - `relaxed`: When set to true, the `arrive` operation has relaxed memory semantics
    and does not provide any ordering or visibility guarantees.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
  """

  OPERATION_NAME = "nvvm.mbarrier.arrive"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, addr, *, count=None, scope=None, relaxed=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    if count is not None: operands.append(count)
    _ods_context = _ods_get_default_loc_context(loc)
    if scope is not None: attributes["scope"] = (scope if (
        isinstance(scope, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
          _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    if relaxed is not None: attributes["relaxed"] = (relaxed if (
        isinstance(relaxed, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relaxed, context=_ods_context))
    results = []
    if res is not None: results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def count(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 2 else self.operation.operands[1]

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

  @builtins.property
  def relaxed(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relaxed"]

  @relaxed.setter
  def relaxed(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relaxed"] = value

  @builtins.property
  def res(self) -> _Optional[_ods_ir.OpResult[_ods_ir.IntegerType]]:
    return None if len(self.operation.results) < 1 else self.operation.results[0]

def mbarrier_arrive(res, addr, *, count=None, scope=None, relaxed=None, loc=None, ip=None) -> _Union[_ods_ir.OpResult, _ods_ir.OpResultList, MBarrierArriveOp]:
  op = MBarrierArriveOp(res=res, addr=addr, count=count, scope=scope, relaxed=relaxed, loc=loc, ip=ip); results = op.results
  return results if len(results) > 1 else (results[0] if len(results) == 1 else op)

@_ods_cext.register_operation(_Dialect)
class MBarrierCompleteTxOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.complete_tx` operation decrements the transaction
  count of the *mbarrier object* at `addr` by `txcount`. It also signals
  the completion of asynchronous transactions that were tracked by the
  current phase. The `scope` specifies the set of threads that can directly
  observe the memory synchronizing effect of the `mbarrier.complete_tx`
  operation. `CTA` and `CLUSTER` are the only allowed values for `scope`.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-complete-tx)
  """

  OPERATION_NAME = "nvvm.mbarrier.complete_tx"

  _ODS_REGIONS = (0, True)

  def __init__(self, addr, txcount, *, scope=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(txcount)
    _ods_context = _ods_get_default_loc_context(loc)
    if scope is not None: attributes["scope"] = (scope if (
        isinstance(scope, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
          _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def txcount(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

def mbarrier_complete_tx(addr, txcount, *, scope=None, loc=None, ip=None) -> MBarrierCompleteTxOp:
  return MBarrierCompleteTxOp(addr=addr, txcount=txcount, scope=scope, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class MBarrierExpectTxOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.expect_tx` operation increases the transaction count
  of the mbarrier located at `addr` by `txcount` amount. The `scope`
  specifies the set of threads that can directly observe the memory
  synchronizing effect of the `mbarrier.expect_tx` operation. `CTA`
  and `CLUSTER` are the only allowed values for `scope`.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-expect-tx)
  """

  OPERATION_NAME = "nvvm.mbarrier.expect_tx"

  _ODS_REGIONS = (0, True)

  def __init__(self, addr, txcount, *, scope=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(txcount)
    _ods_context = _ods_get_default_loc_context(loc)
    if scope is not None: attributes["scope"] = (scope if (
        isinstance(scope, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
          _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def txcount(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

def mbarrier_expect_tx(addr, txcount, *, scope=None, loc=None, ip=None) -> MBarrierExpectTxOp:
  return MBarrierExpectTxOp(addr=addr, txcount=txcount, scope=scope, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class MBarrierInitOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.init` operation initializes an *mbarrier object* at the specified 
  memory location. 
  
  This operation initializes the *mbarrier object* with the following state:
  - Current phase: 0
  - Expected arrival count: `count`
  - Pending arrival count: `count`  
  - Transaction count (tx-count): 0
  
  The operation takes the following operands:
  - `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
    must be a pointer to generic or shared::cta memory. When it is generic, the
    underlying address must be within the shared::cta memory space; otherwise
    the behavior is undefined.
  - `count`: Integer specifying the number of threads that will participate in barrier
    synchronization. Must be in the range [1, 2²⁰ - 1].
  - `predicate`: Optional predicate for conditional execution.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-init)
  """

  OPERATION_NAME = "nvvm.mbarrier.init"

  _ODS_REGIONS = (0, True)

  def __init__(self, addr, count, *, predicate=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(count)
    if predicate is not None: operands.append(predicate)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def count(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def predicate(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 3 else self.operation.operands[2]

def mbarrier_init(addr, count, *, predicate=None, loc=None, ip=None) -> MBarrierInitOp:
  return MBarrierInitOp(addr=addr, count=count, predicate=predicate, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class MBarrierInvalOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.inval` operation invalidates an *mbarrier object* at the 
  specified memory location. 
  
  This operation marks the *mbarrier object* as invalid, making it safe to repurpose 
  the memory location for other uses or to reinitialize it as a new *mbarrier object*.
  It is undefined behavior if the *mbarrier object* is already invalid.
  
  The operation takes the following operand:
  - `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
    must be a pointer to generic or shared::cta memory. When it is generic, the
    underlying address must be within the shared::cta memory space; otherwise
    the behavior is undefined.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-inval)
  """

  OPERATION_NAME = "nvvm.mbarrier.inval"

  _ODS_REGIONS = (0, True)

  def __init__(self, addr, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

def mbarrier_inval(addr, *, loc=None, ip=None) -> MBarrierInvalOp:
  return MBarrierInvalOp(addr=addr, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class MBarrierTestWaitOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.test.wait` operation performs a non-blocking test for the
  completion of a specific phase of an *mbarrier object*. It uses the default
  `.acquire.cta` semantics. This acquire pattern establishes memory ordering for
  operations occurring in program order after this wait instruction by making
  operations from other threads in the CTA visible to subsequent operations in the current
  thread. When this wait completes, it synchronizes with the corresponding release
  pattern from the `mbarrier.arrive` operation, establishing memory ordering within
  the CTA.
  
  This operation tests whether the mbarrier phase specified by the state operand
  has completed. It is a non-blocking instruction that immediately returns the
  completion status without suspending the executing thread.
  
  The operation takes the following operands:
  - `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
    addressing, but the address must still be in the shared memory space.
  - `stateOrPhase`: This argument represents a `state` when it is a 64-bit value
    and represents a `phase` when it is a 32-bit value. The `state` is an opaque
    value returned by a previous `mbarrier.arrive` operation on the same
    *mbarrier object* during the current or immediately preceding phase.
    The `phase` is an integer specifying the phase parity (0 or 1).
    Even phases have parity 0, odd phases have parity 1.
  - `scope`: This specifies the set of threads that directly observe the memory
    synchronizing effect of the `mbarrier.test.wait` operation.
  - `relaxed`: When set to true, the `arrive` operation has relaxed memory semantics
    and does not provide any ordering or visibility guarantees.
  
  The operation returns a boolean value indicating whether the specified phase 
  has completed:
  - `true`: The immediately preceding phase has completed
  - `false`: The phase is still incomplete (current phase)
  
  **Memory ordering guarantees**: When this wait returns true, the following 
  ordering guarantees hold:
  
  1. All memory accesses (except async operations) requested prior to
     `mbarrier.arrive` having release semantics by participating CTA threads
     are visible to the executing thread.
  2. All `cp.async` operations requested prior to `cp.async.mbarrier.arrive`
     by participating CTA threads are visible to the executing thread.
  3. All `cp.async.bulk` operations using the same *mbarrier object* requested
     prior to `mbarrier.arrive` having release semantics by participating CTA
     threads are visible to the executing thread.
  4. Memory accesses requested after this wait are not visible to memory
     accesses performed prior to `mbarrier.arrive` by other participating
     threads.
  5. No ordering guarantee exists for memory accesses by the same thread
     between `mbarrier.arrive` and this wait.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
  """

  OPERATION_NAME = "nvvm.mbarrier.test.wait"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, addr, stateOrPhase, *, scope=None, relaxed=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(stateOrPhase)
    _ods_context = _ods_get_default_loc_context(loc)
    if scope is not None: attributes["scope"] = (scope if (
        isinstance(scope, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
          _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    if relaxed is not None: attributes["relaxed"] = (relaxed if (
        isinstance(relaxed, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relaxed, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def stateOrPhase(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

  @builtins.property
  def relaxed(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relaxed"]

  @relaxed.setter
  def relaxed(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relaxed"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def mbarrier_test_wait(res, addr, state_or_phase, *, scope=None, relaxed=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return MBarrierTestWaitOp(res=res, addr=addr, stateOrPhase=state_or_phase, scope=scope, relaxed=relaxed, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class MBarrierTryWaitOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.try_wait` operation checks whether the specified
  *mbarrier object* at `addr` has completed the given phase. Note that
  unlike the `nvvm.mbarrier.test.wait` operation, the try_wait operation
  is a potentially-blocking one. If the phase is not yet complete, the
  calling thread may be suspended. A suspended thread resumes execution
  once the phase completes or when a system-defined timeout occurs.
  Optionally, the `ticks` operand can be used to provide a custom timeout
  (in nanoseconds), overriding the system-defined one. The semantics of
  this operation and its operands are otherwise similar to those of the
  `nvvm.mbarrier.test.wait` Op.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
  """

  OPERATION_NAME = "nvvm.mbarrier.try_wait"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, addr, stateOrPhase, *, ticks=None, scope=None, relaxed=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(stateOrPhase)
    if ticks is not None: operands.append(ticks)
    _ods_context = _ods_get_default_loc_context(loc)
    if scope is not None: attributes["scope"] = (scope if (
        isinstance(scope, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
          _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    if relaxed is not None: attributes["relaxed"] = (relaxed if (
        isinstance(relaxed, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(relaxed, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def stateOrPhase(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def ticks(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 3 else self.operation.operands[2]

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

  @builtins.property
  def relaxed(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["relaxed"]

  @relaxed.setter
  def relaxed(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["relaxed"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def mbarrier_try_wait(res, addr, state_or_phase, *, ticks=None, scope=None, relaxed=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return MBarrierTryWaitOp(res=res, addr=addr, stateOrPhase=state_or_phase, ticks=ticks, scope=scope, relaxed=relaxed, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class MBarrierTryWaitParityOp(_ods_ir.OpView):
  r"""
  The `nvvm.mbarrier.try_wait.parity` operation performs a potentially-blocking 
  test for the completion of a specific phase of an *mbarrier object* using phase 
  parity. It uses the default `.acquire.cta` semantics. This acquire pattern 
  establishes memory ordering for operations occurring in program order after this 
  wait instruction by making operations from other threads in the CTA visible to subsequent 
  operations in the current thread. When this wait completes, it synchronizes with 
  the corresponding release pattern from the `mbarrier.arrive` operation, establishing 
  memory ordering within the CTA.
  
  This operation waits for the completion of the mbarrier phase indicated by the 
  phase parity. While it uses the underlying PTX `mbarrier.try_wait.parity` 
  instruction, this MLIR operation generates a loop that enforces the test to 
  complete before continuing execution, ensuring the barrier phase is actually 
  completed rather than potentially timing out.
  
  The operation takes the following operands:
  - `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic 
    addressing, but the address must still be in the shared memory space.
  - `phase`: An integer specifying the phase parity (0 or 1). Even phases 
    have parity 0, odd phases have parity 1.
  - `ticks`: An unsigned integer specifying the suspend time hint in 
    nanoseconds. This may be used instead of the system-dependent time limit.
  
  **Memory ordering guarantees**: When this wait returns true, the following 
  ordering guarantees hold:
  
  1. All memory accesses (except async operations) requested prior to
     `mbarrier.arrive` having release semantics by participating CTA threads
     are visible to the executing thread.
  2. All `cp.async` operations requested prior to `cp.async.mbarrier.arrive`
     by participating CTA threads are visible to the executing thread.
  3. All `cp.async.bulk` operations using the same *mbarrier object* requested
     prior to `mbarrier.arrive` having release semantics by participating CTA
     threads are visible to the executing thread.
  4. Memory accesses requested after this wait are not visible to memory
     accesses performed prior to `mbarrier.arrive` by other participating
     threads.
  5. No ordering guarantee exists for memory accesses by the same thread
     between `mbarrier.arrive` and this wait.
  
  **Implementation behavior**:
  This operation generates a PTX loop that repeatedly calls the underlying 
  `mbarrier.try_wait.parity` instruction until the barrier phase completes. 
  Unlike the raw PTX instruction which may return without completion after a 
  timeout, this MLIR operation guarantees completion by continuing to loop until 
  the specified phase is reached.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
  """

  OPERATION_NAME = "nvvm.mbarrier.try_wait.parity"

  _ODS_REGIONS = (0, True)

  def __init__(self, addr, phase, ticks, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(phase)
    operands.append(ticks)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def phase(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def ticks(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[2]

def mbarrier_try_wait_parity(addr, phase, ticks, *, loc=None, ip=None) -> MBarrierTryWaitParityOp:
  return MBarrierTryWaitParityOp(addr=addr, phase=phase, ticks=ticks, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class MapaOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.mapa"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, a, b, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(a)
    operands.append(b)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def a(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def b(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def mapa(res, a, b, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return MapaOp(res=res, a=a, b=b, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class MatchSyncOp(_ods_ir.OpView):
  r"""
  The `match.sync` op performs broadcast and compare of operand `val` across 
  all non-exited threads in `thread_mask` and returns a mask depending on the 
  kind and an optional predicate.
  
  The matching operation kinds are:
  - `any`: Returns a mask corresponding to the non-exited threads in the 
  `thread_mask` that have the same value of operand `val`.
  - `all`: Returns a mask and a predicate. If all non-exited threads in the 
  `thread_mask` have the same value of operand `val`, the predicate is set to 
  true and the mask corresponds to the non-exited threads in the 
  `thread_mask`. Otherwise, the predicate is set to false and the mask is 0.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-match-sync)
  """

  OPERATION_NAME = "nvvm.match.sync"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, thread_mask, val, kind, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(thread_mask)
    operands.append(val)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MatchSyncKindAttr')) else
      _ods_ir.AttrBuilder.get('MatchSyncKindAttr')(kind, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def thread_mask(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[0]

  @builtins.property
  def val(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def match_sync(res, thread_mask, val, kind, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return MatchSyncOp(res=res, thread_mask=thread_mask, val=val, kind=kind, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class MembarOp(_ods_ir.OpView):
  r"""
  `membar` operation guarantees that prior memory accesses requested by this
  thread are performed at the specified `scope`, before later memory
  operations requested by this thread following the membar instruction.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar)
  """

  OPERATION_NAME = "nvvm.memory.barrier"

  _ODS_REGIONS = (0, True)

  def __init__(self, scope, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["scope"] = (scope if (
    isinstance(scope, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MemScopeKindAttr')) else
      _ods_ir.AttrBuilder.get('MemScopeKindAttr')(scope, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def scope(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scope"]

  @scope.setter
  def scope(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scope"] = value

def memory_barrier(scope, *, loc=None, ip=None) -> MembarOp:
  return MembarOp(scope=scope, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class MmaBlockScaleOp(_ods_ir.OpView):
  r"""
  The `nvvm.mma.block_scale` operation collectively performs the operation
  `D = matmul(A * SF_A, B * SF_B) + C` using all threads in a warp.
  
  A, B, C and D are dense matrices and SF_A and SF_B are scaling factors.
  Dimensions of SF_A and SF_B are based on scale vector sizes (x1, x2, x4),
  and the data type must be either ue8m0 or ue4m3.
  
  All the threads in the warp must execute the same `mma.block_scale` operation.
  
  This operation follows the same design pattern as `nvvm.mma.sync`, with additional
  scaling operands for both A and B matrices.
  
  Example:
  ```mlir
  %d = nvvm.mma.block_scale A[%a0, %a1] B[%b0, %b1] C[%c0, %c1]
                            scaleA[%scaleAData, %byteIdA, %threadIdA]
                            scaleB[%scaleBData, %byteIdB, %threadIdB]
                            {shape = #nvvm.shape<m = 16, n = 8, k = 64>,
                             multiplicandAPtxType = #nvvm.mma_type<e2m1>,
                             multiplicandBPtxType = #nvvm.mma_type<e2m1>,
                             scaleVecSize = #nvvm.scale_vec_size<x2>,
                             blockScaleFormat = #nvvm.block_scale_format<ue8m0>,
                             kind = #nvvm.block_scale_kind<mxf4nvf4>}
      : (vector<4xf16>, vector<2xf16>, vector<2xf32>) -> !llvm.struct<(f32, f32)>
  ```
  """

  OPERATION_NAME = "nvvm.mma.block_scale"

  _ODS_OPERAND_SEGMENTS = [-1,-1,-1,1,1,1,1,1,1,]

  _ODS_REGIONS = (0, True)

  def __init__(self, res, shape, scaleVecSize, blockScaleFormat, kind, operandA, operandB, operandC, scaleAData, byteIdA, threadIdA, scaleBData, byteIdB, threadIdB, *, multiplicandAPtxType=None, multiplicandBPtxType=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(_get_op_results_or_values(operandA))
    operands.append(_get_op_results_or_values(operandB))
    operands.append(_get_op_results_or_values(operandC))
    operands.append(scaleAData)
    operands.append(byteIdA)
    operands.append(threadIdA)
    operands.append(scaleBData)
    operands.append(byteIdB)
    operands.append(threadIdB)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["shape"] = (shape if (
    isinstance(shape, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('NVVM_MMAShapeAttr')) else
      _ods_ir.AttrBuilder.get('NVVM_MMAShapeAttr')(shape, context=_ods_context))
    if multiplicandAPtxType is not None: attributes["multiplicandAPtxType"] = (multiplicandAPtxType if (
        isinstance(multiplicandAPtxType, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
          _ods_ir.AttrBuilder.get('MMATypesAttr')(multiplicandAPtxType, context=_ods_context))
    if multiplicandBPtxType is not None: attributes["multiplicandBPtxType"] = (multiplicandBPtxType if (
        isinstance(multiplicandBPtxType, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
          _ods_ir.AttrBuilder.get('MMATypesAttr')(multiplicandBPtxType, context=_ods_context))
    attributes["scaleVecSize"] = (scaleVecSize if (
    isinstance(scaleVecSize, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('ScaleVecSizeAttr')) else
      _ods_ir.AttrBuilder.get('ScaleVecSizeAttr')(scaleVecSize, context=_ods_context))
    attributes["blockScaleFormat"] = (blockScaleFormat if (
    isinstance(blockScaleFormat, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('BlockScaleFormatAttr')) else
      _ods_ir.AttrBuilder.get('BlockScaleFormatAttr')(blockScaleFormat, context=_ods_context))
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMABlockScaleKindAttr')) else
      _ods_ir.AttrBuilder.get('MMABlockScaleKindAttr')(kind, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def operandA(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range

  @builtins.property
  def operandB(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range

  @builtins.property
  def operandC(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range

  @builtins.property
  def scaleAData(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0]

  @builtins.property
  def byteIdA(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range[0]

  @builtins.property
  def threadIdA(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 5)
    return operand_range[0]

  @builtins.property
  def scaleBData(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 6)
    return operand_range[0]

  @builtins.property
  def byteIdB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 7)
    return operand_range[0]

  @builtins.property
  def threadIdB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 8)
    return operand_range[0]

  @builtins.property
  def shape(self) -> _ods_ir.Attribute:
    return self.operation.attributes["shape"]

  @shape.setter
  def shape(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["shape"] = value

  @builtins.property
  def multiplicandAPtxType(self) -> _Optional[_ods_ir.Attribute]:
    if "multiplicandAPtxType" not in self.operation.attributes:
      return None
    return self.operation.attributes["multiplicandAPtxType"]

  @multiplicandAPtxType.setter
  def multiplicandAPtxType(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["multiplicandAPtxType"] = value
    elif "multiplicandAPtxType" in self.operation.attributes:
      del self.operation.attributes["multiplicandAPtxType"]

  @multiplicandAPtxType.deleter
  def multiplicandAPtxType(self):
    del self.operation.attributes["multiplicandAPtxType"]

  @builtins.property
  def multiplicandBPtxType(self) -> _Optional[_ods_ir.Attribute]:
    if "multiplicandBPtxType" not in self.operation.attributes:
      return None
    return self.operation.attributes["multiplicandBPtxType"]

  @multiplicandBPtxType.setter
  def multiplicandBPtxType(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["multiplicandBPtxType"] = value
    elif "multiplicandBPtxType" in self.operation.attributes:
      del self.operation.attributes["multiplicandBPtxType"]

  @multiplicandBPtxType.deleter
  def multiplicandBPtxType(self):
    del self.operation.attributes["multiplicandBPtxType"]

  @builtins.property
  def scaleVecSize(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scaleVecSize"]

  @scaleVecSize.setter
  def scaleVecSize(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scaleVecSize"] = value

  @builtins.property
  def blockScaleFormat(self) -> _ods_ir.Attribute:
    return self.operation.attributes["blockScaleFormat"]

  @blockScaleFormat.setter
  def blockScaleFormat(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["blockScaleFormat"] = value

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def mma_block_scale(res, shape, scale_vec_size, block_scale_format, kind, operand_a, operand_b, operand_c, scale_a_data, byte_id_a, thread_id_a, scale_b_data, byte_id_b, thread_id_b, *, multiplicand_a_ptx_type=None, multiplicand_b_ptx_type=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return MmaBlockScaleOp(res=res, shape=shape, scaleVecSize=scale_vec_size, blockScaleFormat=block_scale_format, kind=kind, operandA=operand_a, operandB=operand_b, operandC=operand_c, scaleAData=scale_a_data, byteIdA=byte_id_a, threadIdA=thread_id_a, scaleBData=scale_b_data, byteIdB=byte_id_b, threadIdB=thread_id_b, multiplicandAPtxType=multiplicand_a_ptx_type, multiplicandBPtxType=multiplicand_b_ptx_type, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class MmaOp(_ods_ir.OpView):
  r"""
  The `nvvm.mma.sync` operation collectively performs the operation
  `D = matmul(A, B) + C` using all threads in a warp.
  
  All the threads in the warp must execute the same `mma.sync` operation.
  
  For each possible multiplicand PTX data type, there are one or more possible
  instruction shapes given as "mMnNkK". The below table describes the posssibilities
  as well as the types required for the operands. Note that the data type for
  C (the accumulator) and D (the result) can vary independently when there are
  multiple possibilities in the "C/D Type" column.
  
  When an optional attribute cannot be immediately inferred from the types of
  the operands and the result during parsing or validation, an error will be
  raised.
  
  `b1Op` is only relevant when the binary (b1) type is given to
  `multiplicandDataType`. It specifies how the multiply-and-acumulate is
  performed and is either `xor_popc` or `and_poc`. The default is `xor_popc`.
  
  `intOverflowBehavior` is only relevant when the `multiplicandType` attribute
  is one of `u8, s8, u4, s4`, this attribute describes how overflow is handled
  in the accumulator. When the attribute is `satfinite`, the accumulator values
  are clamped in the int32 range on overflow. This is the default behavior.
  Alternatively, accumulator behavior `wrapped` can also be specified, in
  which case overflow wraps from one end of the range to the other.
  
  `layoutA` and `layoutB` are required and should generally be set to
  `#nvvm.mma_layout<row>` and `#nvvm.mma_layout<col>` respectively, but other
  combinations are possible for certain layouts according to the table below.
  
  ```
  | A/B Type | Shape     | ALayout | BLayout | A Type   | B Type   | C/D Type          |
  |----------|-----------|---------|---------|----------|----------|-------------------|
  | f64      | .m8n8k4   | row     | col     | 1x f64   | 1x f64   | 2x f64            |
  | f16      | .m8n8k4   | row/col | row/col | 2x f16x2 | 2x f16x2 | 4x f16x2 or 8xf32 |
  |          | .m16n8k8  | row     | col     | 2x f16x2 | 1x f16x2 | 2x f16x2 or 4 f32 |
  |          | .m16n8k16 | row     | col     | 4x f16x2 | 2x f16x2 | 2x f16x2 or 4 f32 |
  | bf16     | .m16n8k8  | row     | col     | 2x i32   | 1x i32   | 4x f32            |
  |          | .m16n8k16 | row     | col     | 4x i32   | 2x i32   | 4x f32            |
  | tf32     | .m16n8k4  | row     | col     | 2x i32   | 1x i32   | 4x f32            |
  |          | .m16n8k8  | row     | col     | 4x i32   | 2x i32   | 2x f16x2 or 4 f32 |
  | u8/s8    | .m8n8k16  | row     | col     | 1x i32   | 1x i32   | 2x i32            |
  |          | .m16n8k16 | row     | col     | 2x i32   | 1x i32   | 4x i32            |
  |          | .m16n8k32 | row     | col     | 4x i32   | 2x i32   | 4x i32            |
  | u4/s4    | .m8n8k32  | row     | col     | 1x i32   | 1x i32   | 2x i32            |
  |          | m16n8k32  | row     | col     | 2x i32   | 1x i32   | 4x i32            |
  |          | m16n8k64  | row     | col     | 4x i32   | 2x i32   | 4x i32            |
  | b1       | m8n8k128  | row     | col     | 1x i32   | 1x i32   | 2x i32            |
  |          | m16n8k128 | row     | col     | 2x i32   | 1x i32   | 4x i32            |
  ```
  
  
  Example:
  ```mlir
  
  %128 = nvvm.mma.sync A[%120, %121, %122, %123]
                       B[%124, %125]
                       C[%126, %127]
                       {layoutA = #nvvm.mma_layout<row>,
                        layoutB = #nvvm.mma_layout<col>,
                        shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}}
      : (vector<2xf16>, vector<2xf16>, vector<2xf16>)
         -> !llvm.struct<(vector<2xf16>, vector<2xf16>)>
  ```
  """

  OPERATION_NAME = "nvvm.mma.sync"

  _ODS_OPERAND_SEGMENTS = [-1,-1,-1,]

  _ODS_REGIONS = (0, True)

  def __init__(self, res, shape, layoutA, layoutB, operandA, operandB, operandC, *, b1Op=None, intOverflowBehavior=None, multiplicandAPtxType=None, multiplicandBPtxType=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(_get_op_results_or_values(operandA))
    operands.append(_get_op_results_or_values(operandB))
    operands.append(_get_op_results_or_values(operandC))
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["shape"] = (shape if (
    isinstance(shape, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('NVVM_MMAShapeAttr')) else
      _ods_ir.AttrBuilder.get('NVVM_MMAShapeAttr')(shape, context=_ods_context))
    if b1Op is not None: attributes["b1Op"] = (b1Op if (
        isinstance(b1Op, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMAB1OpAttr')) else
          _ods_ir.AttrBuilder.get('MMAB1OpAttr')(b1Op, context=_ods_context))
    if intOverflowBehavior is not None: attributes["intOverflowBehavior"] = (intOverflowBehavior if (
        isinstance(intOverflowBehavior, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMAIntOverflowAttr')) else
          _ods_ir.AttrBuilder.get('MMAIntOverflowAttr')(intOverflowBehavior, context=_ods_context))
    attributes["layoutA"] = (layoutA if (
    isinstance(layoutA, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMALayoutAttr')) else
      _ods_ir.AttrBuilder.get('MMALayoutAttr')(layoutA, context=_ods_context))
    attributes["layoutB"] = (layoutB if (
    isinstance(layoutB, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMALayoutAttr')) else
      _ods_ir.AttrBuilder.get('MMALayoutAttr')(layoutB, context=_ods_context))
    if multiplicandAPtxType is not None: attributes["multiplicandAPtxType"] = (multiplicandAPtxType if (
        isinstance(multiplicandAPtxType, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
          _ods_ir.AttrBuilder.get('MMATypesAttr')(multiplicandAPtxType, context=_ods_context))
    if multiplicandBPtxType is not None: attributes["multiplicandBPtxType"] = (multiplicandBPtxType if (
        isinstance(multiplicandBPtxType, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
          _ods_ir.AttrBuilder.get('MMATypesAttr')(multiplicandBPtxType, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def operandA(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range

  @builtins.property
  def operandB(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range

  @builtins.property
  def operandC(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range

  @builtins.property
  def shape(self) -> _ods_ir.Attribute:
    return self.operation.attributes["shape"]

  @shape.setter
  def shape(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["shape"] = value

  @builtins.property
  def b1Op(self) -> _Optional[_ods_ir.Attribute]:
    if "b1Op" not in self.operation.attributes:
      return None
    return self.operation.attributes["b1Op"]

  @b1Op.setter
  def b1Op(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["b1Op"] = value
    elif "b1Op" in self.operation.attributes:
      del self.operation.attributes["b1Op"]

  @b1Op.deleter
  def b1Op(self):
    del self.operation.attributes["b1Op"]

  @builtins.property
  def intOverflowBehavior(self) -> _Optional[_ods_ir.Attribute]:
    if "intOverflowBehavior" not in self.operation.attributes:
      return None
    return self.operation.attributes["intOverflowBehavior"]

  @intOverflowBehavior.setter
  def intOverflowBehavior(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["intOverflowBehavior"] = value
    elif "intOverflowBehavior" in self.operation.attributes:
      del self.operation.attributes["intOverflowBehavior"]

  @intOverflowBehavior.deleter
  def intOverflowBehavior(self):
    del self.operation.attributes["intOverflowBehavior"]

  @builtins.property
  def layoutA(self) -> _ods_ir.Attribute:
    return self.operation.attributes["layoutA"]

  @layoutA.setter
  def layoutA(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["layoutA"] = value

  @builtins.property
  def layoutB(self) -> _ods_ir.Attribute:
    return self.operation.attributes["layoutB"]

  @layoutB.setter
  def layoutB(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["layoutB"] = value

  @builtins.property
  def multiplicandAPtxType(self) -> _Optional[_ods_ir.Attribute]:
    if "multiplicandAPtxType" not in self.operation.attributes:
      return None
    return self.operation.attributes["multiplicandAPtxType"]

  @multiplicandAPtxType.setter
  def multiplicandAPtxType(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["multiplicandAPtxType"] = value
    elif "multiplicandAPtxType" in self.operation.attributes:
      del self.operation.attributes["multiplicandAPtxType"]

  @multiplicandAPtxType.deleter
  def multiplicandAPtxType(self):
    del self.operation.attributes["multiplicandAPtxType"]

  @builtins.property
  def multiplicandBPtxType(self) -> _Optional[_ods_ir.Attribute]:
    if "multiplicandBPtxType" not in self.operation.attributes:
      return None
    return self.operation.attributes["multiplicandBPtxType"]

  @multiplicandBPtxType.setter
  def multiplicandBPtxType(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["multiplicandBPtxType"] = value
    elif "multiplicandBPtxType" in self.operation.attributes:
      del self.operation.attributes["multiplicandBPtxType"]

  @multiplicandBPtxType.deleter
  def multiplicandBPtxType(self):
    del self.operation.attributes["multiplicandBPtxType"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def mma_sync(res, shape, layout_a, layout_b, operand_a, operand_b, operand_c, *, b1_op=None, int_overflow_behavior=None, multiplicand_a_ptx_type=None, multiplicand_b_ptx_type=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return MmaOp(res=res, shape=shape, layoutA=layout_a, layoutB=layout_b, operandA=operand_a, operandB=operand_b, operandC=operand_c, b1Op=b1_op, intOverflowBehavior=int_overflow_behavior, multiplicandAPtxType=multiplicand_a_ptx_type, multiplicandBPtxType=multiplicand_b_ptx_type, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class MmaSpBlockScaleOp(_ods_ir.OpView):
  r"""
  The `nvvm.mma.sp.block_scale` operation collectively performs the operation
  `D = matmul(A_sparse * SF_A, B * SF_B) + C` using all threads in a warp.
  
  A is a sparse matrix, and B, C and D are dense matrices.
  SF_A and SF_B are scaling factors.
  Dimensions of SF_A and SF_B are based on scale vector sizes (x1, x2, x4),
  and the data type must be either ue8m0 or ue4m3.
  
  This operation is similar to `nvvm.mma.block_scale` but with structured sparsity
  in the A operand. The sparsity follows the 2:4 structured sparse pattern
  where 2 out of every 4 elements are non-zero.
  
  All the threads in the warp must execute the same `mma.sp.block_scale` operation.
  
  The `sparseMetadata` operand provides the sparsity indices that indicate
  which elements in the A operand are non-zero. The `sparsitySelector`
  controls how the indices are distributed among threads in the warp and
  should typically be 0 or 1.
  
  This operation follows the same design pattern as `nvvm.mma.sp.sync`, with additional
  scaling operands for both A and B matrices. Note that sparse block scale operations
  always use ordered metadata (sm_90+).
  
  Example:
  ```mlir
  %d = nvvm.mma.sp.block_scale A[%a0, %a1] B[%b0, %b1] C[%c0, %c1]
                               sparseMetadata[%meta] selector[%sel]
                               scaleA[%scaleAData, %byteIdA, %threadIdA]
                               scaleB[%scaleBData, %byteIdB, %threadIdB]
                               {shape = #nvvm.shape<m = 16, n = 8, k = 128>,
                                multiplicandAPtxType = #nvvm.mma_type<e2m1>,
                                multiplicandBPtxType = #nvvm.mma_type<e2m1>,
                                scaleVecSize = #nvvm.scale_vec_size<x2>,
                                blockScaleFormat = #nvvm.block_scale_format<ue8m0>,
                                kind = #nvvm.block_scale_kind<mxf4>}
      : (vector<2xf16>, vector<2xf16>, vector<2xf32>) -> !llvm.struct<(f32, f32)>
  ```
  """

  OPERATION_NAME = "nvvm.mma.sp.block_scale"

  _ODS_OPERAND_SEGMENTS = [-1,-1,-1,1,1,1,1,1,1,1,1,]

  _ODS_REGIONS = (0, True)

  def __init__(self, res, shape, scaleVecSize, blockScaleFormat, kind, operandA, operandB, operandC, sparseMetadata, sparsitySelector, scaleAData, byteIdA, threadIdA, scaleBData, byteIdB, threadIdB, *, multiplicandAPtxType=None, multiplicandBPtxType=None, orderedMetadata=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(_get_op_results_or_values(operandA))
    operands.append(_get_op_results_or_values(operandB))
    operands.append(_get_op_results_or_values(operandC))
    operands.append(sparseMetadata)
    operands.append(sparsitySelector)
    operands.append(scaleAData)
    operands.append(byteIdA)
    operands.append(threadIdA)
    operands.append(scaleBData)
    operands.append(byteIdB)
    operands.append(threadIdB)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["shape"] = (shape if (
    isinstance(shape, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('NVVM_MMAShapeAttr')) else
      _ods_ir.AttrBuilder.get('NVVM_MMAShapeAttr')(shape, context=_ods_context))
    if multiplicandAPtxType is not None: attributes["multiplicandAPtxType"] = (multiplicandAPtxType if (
        isinstance(multiplicandAPtxType, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
          _ods_ir.AttrBuilder.get('MMATypesAttr')(multiplicandAPtxType, context=_ods_context))
    if multiplicandBPtxType is not None: attributes["multiplicandBPtxType"] = (multiplicandBPtxType if (
        isinstance(multiplicandBPtxType, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
          _ods_ir.AttrBuilder.get('MMATypesAttr')(multiplicandBPtxType, context=_ods_context))
    attributes["scaleVecSize"] = (scaleVecSize if (
    isinstance(scaleVecSize, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('ScaleVecSizeAttr')) else
      _ods_ir.AttrBuilder.get('ScaleVecSizeAttr')(scaleVecSize, context=_ods_context))
    attributes["blockScaleFormat"] = (blockScaleFormat if (
    isinstance(blockScaleFormat, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('BlockScaleFormatAttr')) else
      _ods_ir.AttrBuilder.get('BlockScaleFormatAttr')(blockScaleFormat, context=_ods_context))
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMABlockScaleKindAttr')) else
      _ods_ir.AttrBuilder.get('MMABlockScaleKindAttr')(kind, context=_ods_context))
    if bool(orderedMetadata): attributes["orderedMetadata"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def operandA(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range

  @builtins.property
  def operandB(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range

  @builtins.property
  def operandC(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range

  @builtins.property
  def sparseMetadata(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0]

  @builtins.property
  def sparsitySelector(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range[0]

  @builtins.property
  def scaleAData(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 5)
    return operand_range[0]

  @builtins.property
  def byteIdA(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 6)
    return operand_range[0]

  @builtins.property
  def threadIdA(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 7)
    return operand_range[0]

  @builtins.property
  def scaleBData(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 8)
    return operand_range[0]

  @builtins.property
  def byteIdB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 9)
    return operand_range[0]

  @builtins.property
  def threadIdB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 10)
    return operand_range[0]

  @builtins.property
  def shape(self) -> _ods_ir.Attribute:
    return self.operation.attributes["shape"]

  @shape.setter
  def shape(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["shape"] = value

  @builtins.property
  def multiplicandAPtxType(self) -> _Optional[_ods_ir.Attribute]:
    if "multiplicandAPtxType" not in self.operation.attributes:
      return None
    return self.operation.attributes["multiplicandAPtxType"]

  @multiplicandAPtxType.setter
  def multiplicandAPtxType(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["multiplicandAPtxType"] = value
    elif "multiplicandAPtxType" in self.operation.attributes:
      del self.operation.attributes["multiplicandAPtxType"]

  @multiplicandAPtxType.deleter
  def multiplicandAPtxType(self):
    del self.operation.attributes["multiplicandAPtxType"]

  @builtins.property
  def multiplicandBPtxType(self) -> _Optional[_ods_ir.Attribute]:
    if "multiplicandBPtxType" not in self.operation.attributes:
      return None
    return self.operation.attributes["multiplicandBPtxType"]

  @multiplicandBPtxType.setter
  def multiplicandBPtxType(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["multiplicandBPtxType"] = value
    elif "multiplicandBPtxType" in self.operation.attributes:
      del self.operation.attributes["multiplicandBPtxType"]

  @multiplicandBPtxType.deleter
  def multiplicandBPtxType(self):
    del self.operation.attributes["multiplicandBPtxType"]

  @builtins.property
  def scaleVecSize(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scaleVecSize"]

  @scaleVecSize.setter
  def scaleVecSize(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scaleVecSize"] = value

  @builtins.property
  def blockScaleFormat(self) -> _ods_ir.Attribute:
    return self.operation.attributes["blockScaleFormat"]

  @blockScaleFormat.setter
  def blockScaleFormat(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["blockScaleFormat"] = value

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def orderedMetadata(self) -> bool:
    return "orderedMetadata" in self.operation.attributes

  @orderedMetadata.setter
  def orderedMetadata(self, value):
    if bool(value):
      self.operation.attributes["orderedMetadata"] = _ods_ir.UnitAttr.get()
    elif "orderedMetadata" in self.operation.attributes:
      del self.operation.attributes["orderedMetadata"]

  @orderedMetadata.deleter
  def orderedMetadata(self):
    del self.operation.attributes["orderedMetadata"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def mma_sp_block_scale(res, shape, scale_vec_size, block_scale_format, kind, operand_a, operand_b, operand_c, sparse_metadata, sparsity_selector, scale_a_data, byte_id_a, thread_id_a, scale_b_data, byte_id_b, thread_id_b, *, multiplicand_a_ptx_type=None, multiplicand_b_ptx_type=None, ordered_metadata=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return MmaSpBlockScaleOp(res=res, shape=shape, scaleVecSize=scale_vec_size, blockScaleFormat=block_scale_format, kind=kind, operandA=operand_a, operandB=operand_b, operandC=operand_c, sparseMetadata=sparse_metadata, sparsitySelector=sparsity_selector, scaleAData=scale_a_data, byteIdA=byte_id_a, threadIdA=thread_id_a, scaleBData=scale_b_data, byteIdB=byte_id_b, threadIdB=thread_id_b, multiplicandAPtxType=multiplicand_a_ptx_type, multiplicandBPtxType=multiplicand_b_ptx_type, orderedMetadata=ordered_metadata, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class MmaSpOp(_ods_ir.OpView):
  r"""
  The `nvvm.mma.sp.sync` operation collectively performs the sparse operation
  `D = matmul(A_sparse, B) + C` using all threads in a warp.
  
  This operation is similar to `nvvm.mma.sync` but with structured sparsity
  in the A operand. The sparsity follows the 2:4 structured sparse pattern
  where 2 out of every 4 elements are non-zero.
  
  All the threads in the warp must execute the same `mma.sp.sync` operation.
  
  The `sparseMetadata` operand provides the sparsity indices that indicate
  which elements in the A operand are non-zero. The `sparsitySelector`
  controls how the indices are distributed among threads in the warp and
  should typically be 0 or 1.
  
  The optional `orderedMetadata` attribute specifies the metadata ordering:
  - Absence (default): Uses standard sparse metadata ordering
  - Presence: Uses ordered metadata (PTX ISA 8.5+, sm_90+)
  
  The optional `kind` attribute specifies mixed-precision modes for FP8 operations:
  - `f8f6f4`: Enables e3m2, e2m3, e2m1 FP8 types and f16 accumulator (PTX ISA 8.7+, sm_90+)
  - Only valid with ordered metadata and m16n8k64 shape
  
  The shapes, layouts, and data types follow the same constraints as the
  regular `nvvm.mma.sync` operation, but the A operand contains only the
  non-zero elements in compressed format.
  
  Example:
  ```mlir
  %d = nvvm.mma.sp.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1]
                        sparseMetadata[%meta] selector[%sel]
                        {shape = {k = 32 : i32, m = 16 : i32, n = 8 : i32}}
      : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)>
  
  // With ordered metadata:
  %d = nvvm.mma.sp.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1]
                        sparseMetadata[%meta] selector[%sel]
                        {orderedMetadata, shape = {k = 32 : i32, m = 16 : i32, n = 8 : i32}}
      : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)>
  ```
  """

  OPERATION_NAME = "nvvm.mma.sp.sync"

  _ODS_OPERAND_SEGMENTS = [-1,-1,-1,1,1,]

  _ODS_REGIONS = (0, True)

  def __init__(self, res, shape, operandA, operandB, operandC, sparseMetadata, sparsitySelector, *, intOverflowBehavior=None, multiplicandAPtxType=None, multiplicandBPtxType=None, orderedMetadata=None, kind=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(_get_op_results_or_values(operandA))
    operands.append(_get_op_results_or_values(operandB))
    operands.append(_get_op_results_or_values(operandC))
    operands.append(sparseMetadata)
    operands.append(sparsitySelector)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["shape"] = (shape if (
    isinstance(shape, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('NVVM_MMAShapeAttr')) else
      _ods_ir.AttrBuilder.get('NVVM_MMAShapeAttr')(shape, context=_ods_context))
    if intOverflowBehavior is not None: attributes["intOverflowBehavior"] = (intOverflowBehavior if (
        isinstance(intOverflowBehavior, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMAIntOverflowAttr')) else
          _ods_ir.AttrBuilder.get('MMAIntOverflowAttr')(intOverflowBehavior, context=_ods_context))
    if multiplicandAPtxType is not None: attributes["multiplicandAPtxType"] = (multiplicandAPtxType if (
        isinstance(multiplicandAPtxType, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
          _ods_ir.AttrBuilder.get('MMATypesAttr')(multiplicandAPtxType, context=_ods_context))
    if multiplicandBPtxType is not None: attributes["multiplicandBPtxType"] = (multiplicandBPtxType if (
        isinstance(multiplicandBPtxType, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
          _ods_ir.AttrBuilder.get('MMATypesAttr')(multiplicandBPtxType, context=_ods_context))
    if bool(orderedMetadata): attributes["orderedMetadata"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    if kind is not None: attributes["kind"] = (kind if (
        isinstance(kind, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMAKindAttr')) else
          _ods_ir.AttrBuilder.get('MMAKindAttr')(kind, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def operandA(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range

  @builtins.property
  def operandB(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range

  @builtins.property
  def operandC(self) -> _ods_ir.OpOperandList:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range

  @builtins.property
  def sparseMetadata(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0]

  @builtins.property
  def sparsitySelector(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range[0]

  @builtins.property
  def shape(self) -> _ods_ir.Attribute:
    return self.operation.attributes["shape"]

  @shape.setter
  def shape(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["shape"] = value

  @builtins.property
  def intOverflowBehavior(self) -> _Optional[_ods_ir.Attribute]:
    if "intOverflowBehavior" not in self.operation.attributes:
      return None
    return self.operation.attributes["intOverflowBehavior"]

  @intOverflowBehavior.setter
  def intOverflowBehavior(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["intOverflowBehavior"] = value
    elif "intOverflowBehavior" in self.operation.attributes:
      del self.operation.attributes["intOverflowBehavior"]

  @intOverflowBehavior.deleter
  def intOverflowBehavior(self):
    del self.operation.attributes["intOverflowBehavior"]

  @builtins.property
  def multiplicandAPtxType(self) -> _Optional[_ods_ir.Attribute]:
    if "multiplicandAPtxType" not in self.operation.attributes:
      return None
    return self.operation.attributes["multiplicandAPtxType"]

  @multiplicandAPtxType.setter
  def multiplicandAPtxType(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["multiplicandAPtxType"] = value
    elif "multiplicandAPtxType" in self.operation.attributes:
      del self.operation.attributes["multiplicandAPtxType"]

  @multiplicandAPtxType.deleter
  def multiplicandAPtxType(self):
    del self.operation.attributes["multiplicandAPtxType"]

  @builtins.property
  def multiplicandBPtxType(self) -> _Optional[_ods_ir.Attribute]:
    if "multiplicandBPtxType" not in self.operation.attributes:
      return None
    return self.operation.attributes["multiplicandBPtxType"]

  @multiplicandBPtxType.setter
  def multiplicandBPtxType(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["multiplicandBPtxType"] = value
    elif "multiplicandBPtxType" in self.operation.attributes:
      del self.operation.attributes["multiplicandBPtxType"]

  @multiplicandBPtxType.deleter
  def multiplicandBPtxType(self):
    del self.operation.attributes["multiplicandBPtxType"]

  @builtins.property
  def orderedMetadata(self) -> bool:
    return "orderedMetadata" in self.operation.attributes

  @orderedMetadata.setter
  def orderedMetadata(self, value):
    if bool(value):
      self.operation.attributes["orderedMetadata"] = _ods_ir.UnitAttr.get()
    elif "orderedMetadata" in self.operation.attributes:
      del self.operation.attributes["orderedMetadata"]

  @orderedMetadata.deleter
  def orderedMetadata(self):
    del self.operation.attributes["orderedMetadata"]

  @builtins.property
  def kind(self) -> _Optional[_ods_ir.Attribute]:
    if "kind" not in self.operation.attributes:
      return None
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["kind"] = value
    elif "kind" in self.operation.attributes:
      del self.operation.attributes["kind"]

  @kind.deleter
  def kind(self):
    del self.operation.attributes["kind"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def mma_sp_sync(res, shape, operand_a, operand_b, operand_c, sparse_metadata, sparsity_selector, *, int_overflow_behavior=None, multiplicand_a_ptx_type=None, multiplicand_b_ptx_type=None, ordered_metadata=None, kind=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return MmaSpOp(res=res, shape=shape, operandA=operand_a, operandB=operand_b, operandC=operand_c, sparseMetadata=sparse_metadata, sparsitySelector=sparsity_selector, intOverflowBehavior=int_overflow_behavior, multiplicandAPtxType=multiplicand_a_ptx_type, multiplicandBPtxType=multiplicand_b_ptx_type, orderedMetadata=ordered_metadata, kind=kind, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class NanosleepOp(_ods_ir.OpView):
  r"""
  The op suspends the thread for a sleep duration approximately close to the 
  delay `$duration`, specified in nanoseconds. 
  
  The sleep duration is approximated, but guaranteed to be in the 
  interval [0, 2*t]. The maximum sleep duration is 1 millisecond. 
  The implementation may reduce the sleep duration for individual threads 
  within a warp such that all sleeping threads in the warp wake up together.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-nanosleep)
  """

  OPERATION_NAME = "nvvm.nanosleep"

  _ODS_REGIONS = (0, True)

  def __init__(self, duration, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(duration)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def duration(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[0]

def nanosleep(duration, *, loc=None, ip=None) -> NanosleepOp:
  return NanosleepOp(duration=duration, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class PMEventOp(_ods_ir.OpView):
  r"""
  Triggers one or more of a fixed number of performance monitor events, with
  event index or mask specified by immediate operand.
  
  Without `mask` it triggers a single performance monitor event indexed by
  immediate operand a, in the range 0..15.
  
  With `mask` it triggers one or more of the performance monitor events. Each
  bit in the 16-bit immediate operand controls an event.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent)
  """

  OPERATION_NAME = "nvvm.pmevent"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, maskedEventId=None, eventId=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if maskedEventId is not None: attributes["maskedEventId"] = (maskedEventId if (
        isinstance(maskedEventId, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('I16Attr')) else
          _ods_ir.AttrBuilder.get('I16Attr')(maskedEventId, context=_ods_context))
    if eventId is not None: attributes["eventId"] = (eventId if (
        isinstance(eventId, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('I32Attr')) else
          _ods_ir.AttrBuilder.get('I32Attr')(eventId, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def maskedEventId(self) -> _Optional[_ods_ir.IntegerAttr]:
    if "maskedEventId" not in self.operation.attributes:
      return None
    return self.operation.attributes["maskedEventId"]

  @maskedEventId.setter
  def maskedEventId(self, value: _Optional[_ods_ir.IntegerAttr]):
    if value is not None:
      self.operation.attributes["maskedEventId"] = value
    elif "maskedEventId" in self.operation.attributes:
      del self.operation.attributes["maskedEventId"]

  @maskedEventId.deleter
  def maskedEventId(self):
    del self.operation.attributes["maskedEventId"]

  @builtins.property
  def eventId(self) -> _Optional[_ods_ir.IntegerAttr]:
    if "eventId" not in self.operation.attributes:
      return None
    return self.operation.attributes["eventId"]

  @eventId.setter
  def eventId(self, value: _Optional[_ods_ir.IntegerAttr]):
    if value is not None:
      self.operation.attributes["eventId"] = value
    elif "eventId" in self.operation.attributes:
      del self.operation.attributes["eventId"]

  @eventId.deleter
  def eventId(self):
    del self.operation.attributes["eventId"]

def pmevent(*, masked_event_id=None, event_id=None, loc=None, ip=None) -> PMEventOp:
  return PMEventOp(maskedEventId=masked_event_id, eventId=event_id, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class PermuteOp(_ods_ir.OpView):
  r"""
  The `nvvm.prmt` operation constructs a permutation of the
  bytes of the first one or two operands, selecting based on 
  the 2 least significant bits of the final operand.
  
  The bytes in the first one or two source operands are numbered. 
  The first source operand (%lo) is numbered {b3, b2, b1, b0}, 
  in the case of the '``default``', '``f4e``' and '``b4e``' variants, 
  the second source operand (%hi) is numbered {b7, b6, b5, b4}.
  
  Modes:
  - `default`: Index mode         - each nibble in `selector` selects a byte from the 8-byte pool
  - `f4e`    : Forward 4 extract  - extracts 4 contiguous bytes starting from position in `selector`
  - `b4e`    : Backward 4 extract - extracts 4 contiguous bytes in reverse order
  - `rc8`    : Replicate 8        - replicates the lower 8 bits across the 32-bit result
  - `ecl`    : Edge clamp left    - clamps out-of-range indices to the leftmost valid byte
  - `ecr`    : Edge clamp right   - clamps out-of-range indices to the rightmost valid byte  
  - `rc16`   : Replicate 16       - replicates the lower 16 bits across the 32-bit result
  
  Depending on the 2 least significant bits of the %selector operand, the result
  of the permutation is defined as follows:
  
  +------------+----------------+--------------+
  |    Mode    | %selector[1:0] |    Output    |
  +------------+----------------+--------------+
  | '``f4e``'  | 0              | {3, 2, 1, 0} |
  |            +----------------+--------------+
  |            | 1              | {4, 3, 2, 1} |
  |            +----------------+--------------+
  |            | 2              | {5, 4, 3, 2} |
  |            +----------------+--------------+
  |            | 3              | {6, 5, 4, 3} |
  +------------+----------------+--------------+
  | '``b4e``'  | 0              | {5, 6, 7, 0} |
  |            +----------------+--------------+
  |            | 1              | {6, 7, 0, 1} |
  |            +----------------+--------------+
  |            | 2              | {7, 0, 1, 2} |
  |            +----------------+--------------+
  |            | 3              | {0, 1, 2, 3} |
  +------------+----------------+--------------+
  | '``rc8``'  | 0              | {0, 0, 0, 0} |
  |            +----------------+--------------+
  |            | 1              | {1, 1, 1, 1} |
  |            +----------------+--------------+
  |            | 2              | {2, 2, 2, 2} |
  |            +----------------+--------------+
  |            | 3              | {3, 3, 3, 3} |
  +------------+----------------+--------------+
  | '``ecl``'  | 0              | {3, 2, 1, 0} |
  |            +----------------+--------------+
  |            | 1              | {3, 2, 1, 1} |
  |            +----------------+--------------+
  |            | 2              | {3, 2, 2, 2} |
  |            +----------------+--------------+
  |            | 3              | {3, 3, 3, 3} |
  +------------+----------------+--------------+
  | '``ecr``'  | 0              | {0, 0, 0, 0} |
  |            +----------------+--------------+
  |            | 1              | {1, 1, 1, 0} |
  |            +----------------+--------------+
  |            | 2              | {2, 2, 1, 0} |
  |            +----------------+--------------+
  |            | 3              | {3, 2, 1, 0} |
  +------------+----------------+--------------+
  | '``rc16``' | 0              | {1, 0, 1, 0} |
  |            +----------------+--------------+
  |            | 1              | {3, 2, 3, 2} |
  |            +----------------+--------------+
  |            | 2              | {1, 0, 1, 0} |
  |            +----------------+--------------+
  |            | 3              | {3, 2, 3, 2} |
  +------------+----------------+--------------+
  
  [For more information, see PTX ISA]
  (https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-prmt)
  """

  OPERATION_NAME = "nvvm.prmt"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, lo, selector, mode, *, hi=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(lo)
    if hi is not None: operands.append(hi)
    operands.append(selector)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["mode"] = (mode if (
    isinstance(mode, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('PermuteModeAttr')) else
      _ods_ir.AttrBuilder.get('PermuteModeAttr')(mode, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def lo(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[0]

  @builtins.property
  def hi(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 3 else self.operation.operands[1]

  @builtins.property
  def selector(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    _ods_variadic_group_length = len(self.operation.operands) - 3 + 1
    return self.operation.operands[2 + _ods_variadic_group_length - 1]

  @builtins.property
  def mode(self) -> _ods_ir.Attribute:
    return self.operation.attributes["mode"]

  @mode.setter
  def mode(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["mode"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def prmt(res, lo, selector, mode, *, hi=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return PermuteOp(res=res, lo=lo, selector=selector, mode=mode, hi=hi, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class PrefetchOp(_ods_ir.OpView):
  r"""
  Prefetches the cache line containing the address given by `addr`. The 
  operand may be a global, local, or generic pointer. When `tensormap` is 
  specified, the operand may instead be a constant or generic pointer. If the 
  address maps to shared memory, the operation has no effect.
  
  At most one of `cacheLevel` or `tensormap` may be present. The `cacheLevel` 
  attribute selects the target cache level. When combined with `uniform`, the 
  prefetch is performed to the uniform cache, in which case `addr` must be a 
  generic pointer.
  
  When `tensormap` is used, the line containing `addr` is brought from the 
  constant or parameter state space for later use by `cp.async.bulk.tensor`. 
  If `in_param_space` is specified, the generic pointer is interpreted as 
  referring to the parameter state space.
  
  `uniform` can be specified after the `cacheLevel` to indicate that the 
  prefetch is performed to the specified uniform cache level. If `uniform` is 
  specified, `addr` must be a generic address pointer and no operation is 
  performed if `addr` maps to a `const`, `local`, or `shared` memory location.
  
  The `evictPriority` attribute is optional and specifies the cache eviction
  priority when `cacheLevel` is L2.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-prefetch-prefetchu)
  """

  OPERATION_NAME = "nvvm.prefetch"

  _ODS_REGIONS = (0, True)

  def __init__(self, addr, *, cacheLevel=None, evictPriority=None, predicate=None, tensormap=None, uniform=None, in_param_space=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    if predicate is not None: operands.append(predicate)
    _ods_context = _ods_get_default_loc_context(loc)
    if cacheLevel is not None: attributes["cacheLevel"] = (cacheLevel if (
        isinstance(cacheLevel, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('PrefetchCacheLevelAttr')) else
          _ods_ir.AttrBuilder.get('PrefetchCacheLevelAttr')(cacheLevel, context=_ods_context))
    if evictPriority is not None: attributes["evictPriority"] = (evictPriority if (
        isinstance(evictPriority, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('CacheEvictionPriorityAttr')) else
          _ods_ir.AttrBuilder.get('CacheEvictionPriorityAttr')(evictPriority, context=_ods_context))
    if bool(tensormap): attributes["tensormap"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    if bool(uniform): attributes["uniform"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    if bool(in_param_space): attributes["in_param_space"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def predicate(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 2 else self.operation.operands[1]

  @builtins.property
  def cacheLevel(self) -> _Optional[_ods_ir.Attribute]:
    if "cacheLevel" not in self.operation.attributes:
      return None
    return self.operation.attributes["cacheLevel"]

  @cacheLevel.setter
  def cacheLevel(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["cacheLevel"] = value
    elif "cacheLevel" in self.operation.attributes:
      del self.operation.attributes["cacheLevel"]

  @cacheLevel.deleter
  def cacheLevel(self):
    del self.operation.attributes["cacheLevel"]

  @builtins.property
  def evictPriority(self) -> _Optional[_ods_ir.Attribute]:
    if "evictPriority" not in self.operation.attributes:
      return None
    return self.operation.attributes["evictPriority"]

  @evictPriority.setter
  def evictPriority(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["evictPriority"] = value
    elif "evictPriority" in self.operation.attributes:
      del self.operation.attributes["evictPriority"]

  @evictPriority.deleter
  def evictPriority(self):
    del self.operation.attributes["evictPriority"]

  @builtins.property
  def tensormap(self) -> bool:
    return "tensormap" in self.operation.attributes

  @tensormap.setter
  def tensormap(self, value):
    if bool(value):
      self.operation.attributes["tensormap"] = _ods_ir.UnitAttr.get()
    elif "tensormap" in self.operation.attributes:
      del self.operation.attributes["tensormap"]

  @tensormap.deleter
  def tensormap(self):
    del self.operation.attributes["tensormap"]

  @builtins.property
  def uniform(self) -> bool:
    return "uniform" in self.operation.attributes

  @uniform.setter
  def uniform(self, value):
    if bool(value):
      self.operation.attributes["uniform"] = _ods_ir.UnitAttr.get()
    elif "uniform" in self.operation.attributes:
      del self.operation.attributes["uniform"]

  @uniform.deleter
  def uniform(self):
    del self.operation.attributes["uniform"]

  @builtins.property
  def in_param_space(self) -> bool:
    return "in_param_space" in self.operation.attributes

  @in_param_space.setter
  def in_param_space(self, value):
    if bool(value):
      self.operation.attributes["in_param_space"] = _ods_ir.UnitAttr.get()
    elif "in_param_space" in self.operation.attributes:
      del self.operation.attributes["in_param_space"]

  @in_param_space.deleter
  def in_param_space(self):
    del self.operation.attributes["in_param_space"]

def prefetch(addr, *, cache_level=None, evict_priority=None, predicate=None, tensormap=None, uniform=None, in_param_space=None, loc=None, ip=None) -> PrefetchOp:
  return PrefetchOp(addr=addr, cacheLevel=cache_level, evictPriority=evict_priority, predicate=predicate, tensormap=tensormap, uniform=uniform, in_param_space=in_param_space, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class RcpApproxFtzF32Op(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.rcp.approx.ftz.f"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, arg, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(arg)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def arg(self) -> _ods_ir.Value[_ods_ir.FloatType]:
    return self.operation.operands[0]

  @builtins.property
  def res(self) -> _ods_ir.OpResult[_ods_ir.FloatType]:
    return self.operation.results[0]

def rcp_approx_ftz_f(res, arg, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return RcpApproxFtzF32Op(res=res, arg=arg, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ReduxOp(_ods_ir.OpView):
  r"""
  `redux.sync` performs a reduction operation `kind` of the 32 bit source 
  register across all non-exited threads in the membermask.
  
  The `abs` and `nan` attributes can be used in the case of f32 input type, 
  where the `abs` attribute causes the absolute value of the input to be used 
  in the reduction operation, and the `nan` attribute causes the reduction 
  operation to return NaN if any of the inputs to participating threads are 
  NaN.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-redux-sync)
  """

  OPERATION_NAME = "nvvm.redux.sync"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, val, kind, mask_and_clamp, *, abs=None, nan=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(val)
    operands.append(mask_and_clamp)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('ReduxKindAttr')) else
      _ods_ir.AttrBuilder.get('ReduxKindAttr')(kind, context=_ods_context))
    if abs is not None: attributes["abs"] = (abs if (
        isinstance(abs, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(abs, context=_ods_context))
    if nan is not None: attributes["nan"] = (nan if (
        isinstance(nan, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('BoolAttr')) else
          _ods_ir.AttrBuilder.get('BoolAttr')(nan, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def val(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def mask_and_clamp(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def abs(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["abs"]

  @abs.setter
  def abs(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["abs"] = value

  @builtins.property
  def nan(self) -> _ods_ir.BoolAttr:
    return self.operation.attributes["nan"]

  @nan.setter
  def nan(self, value: _ods_ir.BoolAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["nan"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def redux_sync(res, val, kind, mask_and_clamp, *, abs=None, nan=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ReduxOp(res=res, val=val, kind=kind, mask_and_clamp=mask_and_clamp, abs=abs, nan=nan, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class SetMaxRegisterOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.setmaxregister"

  _ODS_REGIONS = (0, True)

  def __init__(self, regCount, action, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["regCount"] = (regCount if (
    isinstance(regCount, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(regCount, context=_ods_context))
    attributes["action"] = (action if (
    isinstance(action, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('SetMaxRegisterActionAttr')) else
      _ods_ir.AttrBuilder.get('SetMaxRegisterActionAttr')(action, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def regCount(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["regCount"]

  @regCount.setter
  def regCount(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["regCount"] = value

  @builtins.property
  def action(self) -> _ods_ir.Attribute:
    return self.operation.attributes["action"]

  @action.setter
  def action(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["action"] = value

def setmaxregister(reg_count, action, *, loc=None, ip=None) -> SetMaxRegisterOp:
  return SetMaxRegisterOp(regCount=reg_count, action=action, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class ShflOp(_ods_ir.OpView):
  r"""
  The `shfl.sync` Op implements data shuffle within threads of a warp.
  The `thread_mask` denotes the threads participating in the Op where
  the bit position corresponds to a particular thread's laneid.
  The `offset` specifies a source lane or source lane offset
  (depending on `kind`). The `val` is the input value to be copied from
  the source. The `mask_and_clamp` contains two packed values specifying
  a mask for logically splitting warps into sub-segments and an upper bound
  for clamping the source lane index.
  
  The `return_value_and_is_valid` unit attribute can be specified to indicate 
  that the return value is a two-element struct, where the first element is 
  the result value and the second element is a predicate indicating if the 
  computed source lane index is valid.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-shfl-sync)
  """

  OPERATION_NAME = "nvvm.shfl.sync"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, thread_mask, val, offset, mask_and_clamp, kind, *, return_value_and_is_valid=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(thread_mask)
    operands.append(val)
    operands.append(offset)
    operands.append(mask_and_clamp)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('ShflKindAttr')) else
      _ods_ir.AttrBuilder.get('ShflKindAttr')(kind, context=_ods_context))
    if bool(return_value_and_is_valid): attributes["return_value_and_is_valid"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def thread_mask(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[0]

  @builtins.property
  def val(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def offset(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[2]

  @builtins.property
  def mask_and_clamp(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[3]

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def return_value_and_is_valid(self) -> bool:
    return "return_value_and_is_valid" in self.operation.attributes

  @return_value_and_is_valid.setter
  def return_value_and_is_valid(self, value):
    if bool(value):
      self.operation.attributes["return_value_and_is_valid"] = _ods_ir.UnitAttr.get()
    elif "return_value_and_is_valid" in self.operation.attributes:
      del self.operation.attributes["return_value_and_is_valid"]

  @return_value_and_is_valid.deleter
  def return_value_and_is_valid(self):
    del self.operation.attributes["return_value_and_is_valid"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def shfl_sync(res, thread_mask, val, offset, mask_and_clamp, kind, *, return_value_and_is_valid=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ShflOp(res=res, thread_mask=thread_mask, val=val, offset=offset, mask_and_clamp=mask_and_clamp, kind=kind, return_value_and_is_valid=return_value_and_is_valid, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class SmDimOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.nsmid"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_nsmid(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return SmDimOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class SmIdOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.smid"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_smid(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return SmIdOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class StMatrixOp(_ods_ir.OpView):
  r"""
  Collectively store one or more matrices across all threads in a warp to the
  location indicated by the address operand $ptr in shared memory.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-store-instruction-stmatrix)
  """

  OPERATION_NAME = "nvvm.stmatrix"

  _ODS_REGIONS = (0, True)

  def __init__(self, ptr, sources, layout, shape, eltType, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(ptr)
    operands.extend(_get_op_results_or_values(sources))
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["layout"] = (layout if (
    isinstance(layout, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMALayoutAttr')) else
      _ods_ir.AttrBuilder.get('MMALayoutAttr')(layout, context=_ods_context))
    attributes["shape"] = (shape if (
    isinstance(shape, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('LdStMatrixShapeAttr')) else
      _ods_ir.AttrBuilder.get('LdStMatrixShapeAttr')(shape, context=_ods_context))
    attributes["eltType"] = (eltType if (
    isinstance(eltType, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('LdStMatrixEltTypeAttr')) else
      _ods_ir.AttrBuilder.get('LdStMatrixEltTypeAttr')(eltType, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def ptr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def sources(self) -> _ods_ir.OpOperandList:
    _ods_variadic_group_length = len(self.operation.operands) - 2 + 1
    return self.operation.operands[1:1 + _ods_variadic_group_length]

  @builtins.property
  def layout(self) -> _ods_ir.Attribute:
    return self.operation.attributes["layout"]

  @layout.setter
  def layout(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["layout"] = value

  @builtins.property
  def shape(self) -> _ods_ir.Attribute:
    return self.operation.attributes["shape"]

  @shape.setter
  def shape(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["shape"] = value

  @builtins.property
  def eltType(self) -> _ods_ir.Attribute:
    return self.operation.attributes["eltType"]

  @eltType.setter
  def eltType(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["eltType"] = value

def stmatrix(ptr, sources, layout, shape, elt_type, *, loc=None, ip=None) -> StMatrixOp:
  return StMatrixOp(ptr=ptr, sources=sources, layout=layout, shape=shape, eltType=elt_type, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class SyncWarpOp(_ods_ir.OpView):
  r"""
  The `nvvm.bar.warp.sync` operation performs barrier synchronization for threads 
  within a warp. 
  
  This operation causes the executing thread to wait until all threads corresponding 
  to the `mask` operand have executed a `bar.warp.sync` with the same mask value 
  before resuming execution.
  
  The `mask` operand specifies the threads participating in the barrier, where each 
  bit position corresponds to the thread's lane ID within the warp. Only threads with 
  their corresponding bit set in the mask participate in the barrier synchronization.
  
  **Important constraints**:
  - The behavior is undefined if the executing thread is not included in the mask 
    (i.e., the bit corresponding to the thread's lane ID is not set)
  - For compute capability sm_6x or below, all threads in the mask must execute 
    the same `bar.warp.sync` instruction in convergence
  
  This operation also guarantees memory ordering among participating threads. 
  Threads within the warp that wish to communicate via memory can store to memory, 
  execute `bar.warp.sync`, and then safely read values stored by other threads 
  in the warp.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar-warp-sync)
  """

  OPERATION_NAME = "nvvm.bar.warp.sync"

  _ODS_REGIONS = (0, True)

  def __init__(self, mask, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(mask)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def mask(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[0]

def bar_warp_sync(mask, *, loc=None, ip=None) -> SyncWarpOp:
  return SyncWarpOp(mask=mask, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05AllocOp(_ods_ir.OpView):
  r"""
  The `tcgen05.alloc` Op allocates tensor core memory for
  the amount specified by `nCols` and writes the destination
  address to the `addr` argument. The `nCols` operand specifies the
  number of columns to be allocated and it must be a power-of-two.
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
  """

  OPERATION_NAME = "nvvm.tcgen05.alloc"

  _ODS_REGIONS = (0, True)

  def __init__(self, addr, nCols, *, group=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    operands.append(nCols)
    _ods_context = _ods_get_default_loc_context(loc)
    if group is not None: attributes["group"] = (group if (
        isinstance(group, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
          _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(group, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def nCols(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def group(self) -> _ods_ir.Attribute:
    return self.operation.attributes["group"]

  @group.setter
  def group(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["group"] = value

def tcgen05_alloc(addr, n_cols, *, group=None, loc=None, ip=None) -> Tcgen05AllocOp:
  return Tcgen05AllocOp(addr=addr, nCols=n_cols, group=group, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05CommitOp(_ods_ir.OpView):
  r"""
  The `tcgen05.commit` makes the *mbarrier object*, specified by
  the operand `addr`, track the completion of all the prior
  async-tcgen05 operations initiated by the executing thread.
  The multicast variants allow signaling on the *mbarrier objects*
  of multiple CTAs within the cluster. Operand `multicastMask`,
  when present, specifies the destination CTAs in the cluster such
  that each bit position in the 16-bit `multicastMask` operand
  corresponds to the `nvvm.read.ptx.sreg.ctaid` of the destination CTA.
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen-async-sync-operations-commit)
  """

  OPERATION_NAME = "nvvm.tcgen05.commit"

  _ODS_REGIONS = (0, True)

  def __init__(self, addr, *, multicastMask=None, group=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(addr)
    if multicastMask is not None: operands.append(multicastMask)
    _ods_context = _ods_get_default_loc_context(loc)
    if group is not None: attributes["group"] = (group if (
        isinstance(group, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
          _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(group, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def addr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def multicastMask(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 2 else self.operation.operands[1]

  @builtins.property
  def group(self) -> _ods_ir.Attribute:
    return self.operation.attributes["group"]

  @group.setter
  def group(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["group"] = value

def tcgen05_commit(addr, *, multicast_mask=None, group=None, loc=None, ip=None) -> Tcgen05CommitOp:
  return Tcgen05CommitOp(addr=addr, multicastMask=multicast_mask, group=group, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05CpOp(_ods_ir.OpView):
  r"""
  Instruction tcgen05.cp initiates an asynchronous copy operation from
  shared memory to the location specified by the address operand `taddr`
  in the Tensor Memory. The 64-bit register operand `smem_desc` specifies
  the matrix descriptor representing the source matrix in the shared memory
  that needs to be copied.
  
  Example:
  ```mlir
    nvvm.tcgen05.cp %taddr, %smem_desc {
      group = #nvvm.tcgen05_group<cta_2>,
      shape = #nvvm.tcgen05_cp_shape<shape_64x128b>,
      multicast = #nvvm.tcgen05_cp_multicast<warpx2_01_23>,
      srcFormat = #nvvm.tcgen05_cp_src_fmt<b6x16_p32>
    }
  ```
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tensorcore-5th-generation-instructions-tcgen05-cp)
  """

  OPERATION_NAME = "nvvm.tcgen05.cp"

  _ODS_REGIONS = (0, True)

  def __init__(self, shape, taddr, smem_desc, *, group=None, multicast=None, srcFormat=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(taddr)
    operands.append(smem_desc)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["shape"] = (shape if (
    isinstance(shape, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('Tcgen05CpShapeAttr')) else
      _ods_ir.AttrBuilder.get('Tcgen05CpShapeAttr')(shape, context=_ods_context))
    if group is not None: attributes["group"] = (group if (
        isinstance(group, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
          _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(group, context=_ods_context))
    if multicast is not None: attributes["multicast"] = (multicast if (
        isinstance(multicast, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05CpMulticastAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05CpMulticastAttr')(multicast, context=_ods_context))
    if srcFormat is not None: attributes["srcFormat"] = (srcFormat if (
        isinstance(srcFormat, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05CpSrcFormatAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05CpSrcFormatAttr')(srcFormat, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def taddr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def smem_desc(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def shape(self) -> _ods_ir.Attribute:
    return self.operation.attributes["shape"]

  @shape.setter
  def shape(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["shape"] = value

  @builtins.property
  def group(self) -> _ods_ir.Attribute:
    return self.operation.attributes["group"]

  @group.setter
  def group(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["group"] = value

  @builtins.property
  def multicast(self) -> _ods_ir.Attribute:
    return self.operation.attributes["multicast"]

  @multicast.setter
  def multicast(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["multicast"] = value

  @builtins.property
  def srcFormat(self) -> _Optional[_ods_ir.Attribute]:
    if "srcFormat" not in self.operation.attributes:
      return None
    return self.operation.attributes["srcFormat"]

  @srcFormat.setter
  def srcFormat(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["srcFormat"] = value
    elif "srcFormat" in self.operation.attributes:
      del self.operation.attributes["srcFormat"]

  @srcFormat.deleter
  def srcFormat(self):
    del self.operation.attributes["srcFormat"]

def tcgen05_cp(shape, taddr, smem_desc, *, group=None, multicast=None, src_format=None, loc=None, ip=None) -> Tcgen05CpOp:
  return Tcgen05CpOp(shape=shape, taddr=taddr, smem_desc=smem_desc, group=group, multicast=multicast, srcFormat=src_format, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05DeallocOp(_ods_ir.OpView):
  r"""
  The `tcgen05.dealloc` Op de-allocates the tensor core memory
  specified by `tmemAddr`, which must be from a previous tensor
  memory allocation. The `nCols` operand specifies the number
  of columns to be de-allocated, and it must be a power-of-two.
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
  """

  OPERATION_NAME = "nvvm.tcgen05.dealloc"

  _ODS_REGIONS = (0, True)

  def __init__(self, taddr, nCols, *, group=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(taddr)
    operands.append(nCols)
    _ods_context = _ods_get_default_loc_context(loc)
    if group is not None: attributes["group"] = (group if (
        isinstance(group, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
          _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(group, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def taddr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def nCols(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def group(self) -> _ods_ir.Attribute:
    return self.operation.attributes["group"]

  @group.setter
  def group(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["group"] = value

def tcgen05_dealloc(taddr, n_cols, *, group=None, loc=None, ip=None) -> Tcgen05DeallocOp:
  return Tcgen05DeallocOp(taddr=taddr, nCols=n_cols, group=group, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05FenceOp(_ods_ir.OpView):
  r"""
  The `tcgen05.fence<before>` orders all prior async tcgen05 operations
  with respect to the subsequent tcgen05 and execution ordering operations.
  The `tcgen05.fence<after>` orders all subsequent async tcgen05 operations
  with respect to the prior tcgen05 and execution ordering operations.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tensorcore-5th-generation-instructions-tcgen05-fence)
  """

  OPERATION_NAME = "nvvm.tcgen05.fence"

  _ODS_REGIONS = (0, True)

  def __init__(self, kind, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('Tcgen05FenceKindAttr')) else
      _ods_ir.AttrBuilder.get('Tcgen05FenceKindAttr')(kind, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

def tcgen05_fence(kind, *, loc=None, ip=None) -> Tcgen05FenceOp:
  return Tcgen05FenceOp(kind=kind, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05LdOp(_ods_ir.OpView):
  r"""
  Instruction `tcgen05.ld` asynchronously loads data from the Tensor Memory at
  the location specified by the 32-bit address operand `tmemAddr` into the
  destination register `res`, collectively across all threads of the warps.
  
  The `shape` and the `num` attribute together determines the total
  dimension of the data which is loaded from the Tensor Memory. The `shape`
  attribute indicates the base dimension of data to be accessed as described
  in the Data Movement Shape. The `num` attribute indicates the repeat
  factor on the base dimension resulting in the total dimension of the data
  that is accessed.
  
  The shape `16x32bx2` performs two accesses into Tensor Memory of the shape
  `16x32b`. The base address of the first access is specified by `tmemAddr`
  and the base address of the second access is specified by
  `tmemAddr + offset`, where `offset` is an immediate argument.
  
  The unit attribute `pack` can be used to pack two 16-bit
  elements from adjacent columns into a single 32-bit element during the load.
  
  The following table describes the size of the vector for various combinations
  of `num` and `shape` attributes:
  ```
  |=====================================================================|
  | num/shape      |     16x32bx2/16x64b/32x32b |  16x128b   | 16x256b  |
  |=====================================================================|
  | x1             |          1                 |    2       |    4     |
  | x2             |          2                 |    4       |    8     |
  | x4             |          4                 |    8       |    16    |
  | x8             |          8                 |    16      |    32    |
  | x16            |          16                |    32      |    64    |
  | x32            |          32                |    64      |    128   |
  | x64            |          64                |    128     |    NA    |
  | x128           |          128               |    NA      |    NA    |
  |=====================================================================|
  ```
  
  Example:
  ```mlir
    nvvm.tcgen05.ld %tmemAddr, %offset pack {
      shape = #nvvm.tcgen05_ldst_shape<shape_16x32bx2>,
    } : <2xi32>
  ```
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-st)
  """

  OPERATION_NAME = "nvvm.tcgen05.ld"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, shape, tmemAddr, *, pack=None, offset=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(tmemAddr)
    if offset is not None: operands.append(offset)
    _ods_context = _ods_get_default_loc_context(loc)
    if bool(pack): attributes["pack"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    attributes["shape"] = (shape if (
    isinstance(shape, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('Tcgen05LdStShapeAttr')) else
      _ods_ir.AttrBuilder.get('Tcgen05LdStShapeAttr')(shape, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def tmemAddr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def offset(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 2 else self.operation.operands[1]

  @builtins.property
  def pack(self) -> bool:
    return "pack" in self.operation.attributes

  @pack.setter
  def pack(self, value):
    if bool(value):
      self.operation.attributes["pack"] = _ods_ir.UnitAttr.get()
    elif "pack" in self.operation.attributes:
      del self.operation.attributes["pack"]

  @pack.deleter
  def pack(self):
    del self.operation.attributes["pack"]

  @builtins.property
  def shape(self) -> _ods_ir.Attribute:
    return self.operation.attributes["shape"]

  @shape.setter
  def shape(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["shape"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def tcgen05_ld(res, shape, tmem_addr, *, pack=None, offset=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return Tcgen05LdOp(res=res, shape=shape, tmemAddr=tmem_addr, pack=pack, offset=offset, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class Tcgen05MMABlockScaleOp(_ods_ir.OpView):
  r"""
  The `tcgen05.mma.block_scale` operation is an asynchronous tensor core instruction
  that performs matrix multiplication, accumulation with block scaling in a
  single fused operation. It targets 5th-generation tensor cores, providing
  developers with fine-grained control over execution and scheduling.
  
  ```
  D = (A * scale_a)  * (B * scale_b)`      // if `enableInputD` is false
  D = (A * scale_a)  * (B * scale_b) + D`
  ```
  
  where:
  - A is an M x (K / 2) matrix in tensor memory or described using shared memory descriptor
  - B is a K x N matrix described using shared memory descriptor
  - D is an M x N accumulator matrix in tensor memory
  - `scale_a` and `scale_b` are matrices in tensor memory used to scale `A` and `B` respectively
  
  The `shared memory descriptor` can be generated using `tcgen05.mma_smem_desc` Op
  
  - `idesc` is a 32 bit value representing the [Instruction Descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instruction-descriptor)
  
  Required Attributes:
  - `kind` is a MMABlockScaleKind attribute
  
  - `ctaGroup` specifies CTA group configuration
    * cta_1: MMA will be performed on the current thread's CTA
    * cta_2: MMA will be performed on the current thread and it's peer CTA
  
  Default Attributes:
  - collectorOp is a Tcgen05MMACollectorOp attribute with matrix A as the collector buffer
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma)
  """

  OPERATION_NAME = "nvvm.tcgen05.mma.block_scale"

  _ODS_REGIONS = (0, True)

  def __init__(self, kind, ctaGroup, matrixD, matrixA, matrixB, idesc, enableInputD, scaleA, scaleB, *, blockScale=None, collectorOp=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(matrixD)
    operands.append(matrixA)
    operands.append(matrixB)
    operands.append(idesc)
    operands.append(enableInputD)
    operands.append(scaleA)
    operands.append(scaleB)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMABlockScaleKindAttr')) else
      _ods_ir.AttrBuilder.get('MMABlockScaleKindAttr')(kind, context=_ods_context))
    attributes["ctaGroup"] = (ctaGroup if (
    isinstance(ctaGroup, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
      _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(ctaGroup, context=_ods_context))
    if blockScale is not None: attributes["blockScale"] = (blockScale if (
        isinstance(blockScale, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05MMABlockScaleAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05MMABlockScaleAttr')(blockScale, context=_ods_context))
    if collectorOp is not None: attributes["collectorOp"] = (collectorOp if (
        isinstance(collectorOp, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05MMACollectorOpAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05MMACollectorOpAttr')(collectorOp, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def matrixD(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def matrixA(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def matrixB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[2]

  @builtins.property
  def idesc(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[3]

  @builtins.property
  def enableInputD(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[4]

  @builtins.property
  def scaleA(self) -> _ods_ir.Value:
    return self.operation.operands[5]

  @builtins.property
  def scaleB(self) -> _ods_ir.Value:
    return self.operation.operands[6]

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def ctaGroup(self) -> _ods_ir.Attribute:
    return self.operation.attributes["ctaGroup"]

  @ctaGroup.setter
  def ctaGroup(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["ctaGroup"] = value

  @builtins.property
  def blockScale(self) -> _ods_ir.Attribute:
    return self.operation.attributes["blockScale"]

  @blockScale.setter
  def blockScale(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["blockScale"] = value

  @builtins.property
  def collectorOp(self) -> _ods_ir.Attribute:
    return self.operation.attributes["collectorOp"]

  @collectorOp.setter
  def collectorOp(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collectorOp"] = value

def tcgen05_mma_block_scale(kind, cta_group, matrix_d, matrix_a, matrix_b, idesc, enable_input_d, scale_a, scale_b, *, block_scale=None, collector_op=None, loc=None, ip=None) -> Tcgen05MMABlockScaleOp:
  return Tcgen05MMABlockScaleOp(kind=kind, ctaGroup=cta_group, matrixD=matrix_d, matrixA=matrix_a, matrixB=matrix_b, idesc=idesc, enableInputD=enable_input_d, scaleA=scale_a, scaleB=scale_b, blockScale=block_scale, collectorOp=collector_op, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05MMAOp(_ods_ir.OpView):
  r"""
  The `tcgen05.mma` operation is an asynchronous tensor core instruction that
  performs matrix multiplication, accumulation in a single fused operation. It
  targets 5th-generation tensor cores, providing developers with fine-grained
  control over execution and scheduling.
  
  ```
  D = A * B + (D * 2^ -scaleInputD)    // if `scaleInputD` is provided
  D = A * B                            // if `enableInputD` is false
  D = A * B + D                        // otherwise
  ```
  
  where:
  - A is an `M x K` matrix in tensor memory or described using shared memory descriptor
  - B is a `K x N` matrix described using shared memory descriptor
  - D is an `M x N` accumulator matrix in tensor memory
  
  The `shared memory descriptor` can be generated using `tcgen05.mma_smem_desc` Op
  
  - idesc is a 32-bit value representing the [Instruction Descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instruction-descriptor)
  
  Optional Operands:
  - `scaleInputD` is an Immediate value operand used for scaling D matrix by 2 ^ (-scaleInputD). The valid range is [0, 15]
  
  - `disableOutputLane` is a vector mask for selective output
    * vector<4 x i32> when ctaGroup is CTA_1
    * vector<8 x i32> when ctaGroup is CTA_2
  
  Required Attributes:
  - `kind` is a Tcgen05MMAKind attribute
  
  - `ctaGroup` specifies CTA group configuration
    * cta_1: MMA will be performed on the current thread's CTA
    * cta_2: MMA will be performed on the current thread and it's peer CTA
  
  Default Attributes:
  - collectorOp is a Tcgen05MMACollectorOp attribute with matrix A as the collector buffer
  
  - `aShift` shifts the rows of the A matrix down by one row and can only be
     applied if A is in tensor memory
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma)
  """

  OPERATION_NAME = "nvvm.tcgen05.mma"

  _ODS_OPERAND_SEGMENTS = [1,1,1,1,1,0,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, kind, ctaGroup, matrixD, matrixA, matrixB, idesc, enableInputD, *, collectorOp=None, aShift=None, scaleInputD=None, disableOutputLane=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(matrixD)
    operands.append(matrixA)
    operands.append(matrixB)
    operands.append(idesc)
    operands.append(enableInputD)
    operands.append(scaleInputD)
    operands.append(disableOutputLane)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('Tcgen05MMAKindAttr')) else
      _ods_ir.AttrBuilder.get('Tcgen05MMAKindAttr')(kind, context=_ods_context))
    attributes["ctaGroup"] = (ctaGroup if (
    isinstance(ctaGroup, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
      _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(ctaGroup, context=_ods_context))
    if collectorOp is not None: attributes["collectorOp"] = (collectorOp if (
        isinstance(collectorOp, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05MMACollectorOpAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05MMACollectorOpAttr')(collectorOp, context=_ods_context))
    if bool(aShift): attributes["aShift"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def matrixD(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def matrixA(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0]

  @builtins.property
  def matrixB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range[0]

  @builtins.property
  def idesc(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0]

  @builtins.property
  def enableInputD(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range[0]

  @builtins.property
  def scaleInputD(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 5)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def disableOutputLane(self) -> _Optional[_ods_ir.Value[_ods_ir.VectorType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 6)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def ctaGroup(self) -> _ods_ir.Attribute:
    return self.operation.attributes["ctaGroup"]

  @ctaGroup.setter
  def ctaGroup(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["ctaGroup"] = value

  @builtins.property
  def collectorOp(self) -> _ods_ir.Attribute:
    return self.operation.attributes["collectorOp"]

  @collectorOp.setter
  def collectorOp(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collectorOp"] = value

  @builtins.property
  def aShift(self) -> bool:
    return "aShift" in self.operation.attributes

  @aShift.setter
  def aShift(self, value):
    if bool(value):
      self.operation.attributes["aShift"] = _ods_ir.UnitAttr.get()
    elif "aShift" in self.operation.attributes:
      del self.operation.attributes["aShift"]

  @aShift.deleter
  def aShift(self):
    del self.operation.attributes["aShift"]

def tcgen05_mma(kind, cta_group, matrix_d, matrix_a, matrix_b, idesc, enable_input_d, *, collector_op=None, a_shift=None, scale_input_d=None, disable_output_lane=None, loc=None, ip=None) -> Tcgen05MMAOp:
  return Tcgen05MMAOp(kind=kind, ctaGroup=cta_group, matrixD=matrix_d, matrixA=matrix_a, matrixB=matrix_b, idesc=idesc, enableInputD=enable_input_d, collectorOp=collector_op, aShift=a_shift, scaleInputD=scale_input_d, disableOutputLane=disable_output_lane, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05MMASparseBlockScaleOp(_ods_ir.OpView):
  r"""
  The `tcgen05.mma.sp.block_scale` operation is an asynchronous tensor core
  instruction that performs matrix multiplication, accumulation with block
  scaling, and sparse `A` matrix in a single fused operation. It targets
  5th-generation tensor cores, providing developers with fine-grained control
  over execution, and scheduling.
  
  ```
  D = (A * scale_a)  * (B * scale_b)      // if `enableInputD` is specified
  D = (A * scale_a)  * (B * scale_b) + D  // otherwise
  ```
  
  where:
  - A is an M x (K / 2) matrix in tensor memory or described using shared memory descriptor
  - B is a K x N matrix described using shared memory descriptor
  - D is an M x N accumulator matrix in tensor memory
  - `scale_a` and `scale_b` are matrices in tensor memory used to scale `A` and `B` respectively
  
  Other attributes and operands are similar to that of tcgen05.mma.block_scale Op
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-sp)
  """

  OPERATION_NAME = "nvvm.tcgen05.mma.sp.block_scale"

  _ODS_REGIONS = (0, True)

  def __init__(self, kind, ctaGroup, matrixD, matrixA, matrixB, idesc, enableInputD, sparseMetadata, scaleA, scaleB, *, blockScale=None, collectorOp=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(matrixD)
    operands.append(matrixA)
    operands.append(matrixB)
    operands.append(idesc)
    operands.append(enableInputD)
    operands.append(sparseMetadata)
    operands.append(scaleA)
    operands.append(scaleB)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMABlockScaleKindAttr')) else
      _ods_ir.AttrBuilder.get('MMABlockScaleKindAttr')(kind, context=_ods_context))
    attributes["ctaGroup"] = (ctaGroup if (
    isinstance(ctaGroup, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
      _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(ctaGroup, context=_ods_context))
    if blockScale is not None: attributes["blockScale"] = (blockScale if (
        isinstance(blockScale, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05MMABlockScaleAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05MMABlockScaleAttr')(blockScale, context=_ods_context))
    if collectorOp is not None: attributes["collectorOp"] = (collectorOp if (
        isinstance(collectorOp, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05MMACollectorOpAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05MMACollectorOpAttr')(collectorOp, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def matrixD(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def matrixA(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def matrixB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[2]

  @builtins.property
  def idesc(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[3]

  @builtins.property
  def enableInputD(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[4]

  @builtins.property
  def sparseMetadata(self) -> _ods_ir.Value:
    return self.operation.operands[5]

  @builtins.property
  def scaleA(self) -> _ods_ir.Value:
    return self.operation.operands[6]

  @builtins.property
  def scaleB(self) -> _ods_ir.Value:
    return self.operation.operands[7]

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def ctaGroup(self) -> _ods_ir.Attribute:
    return self.operation.attributes["ctaGroup"]

  @ctaGroup.setter
  def ctaGroup(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["ctaGroup"] = value

  @builtins.property
  def blockScale(self) -> _ods_ir.Attribute:
    return self.operation.attributes["blockScale"]

  @blockScale.setter
  def blockScale(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["blockScale"] = value

  @builtins.property
  def collectorOp(self) -> _ods_ir.Attribute:
    return self.operation.attributes["collectorOp"]

  @collectorOp.setter
  def collectorOp(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collectorOp"] = value

def tcgen05_mma_sp_block_scale(kind, cta_group, matrix_d, matrix_a, matrix_b, idesc, enable_input_d, sparse_metadata, scale_a, scale_b, *, block_scale=None, collector_op=None, loc=None, ip=None) -> Tcgen05MMASparseBlockScaleOp:
  return Tcgen05MMASparseBlockScaleOp(kind=kind, ctaGroup=cta_group, matrixD=matrix_d, matrixA=matrix_a, matrixB=matrix_b, idesc=idesc, enableInputD=enable_input_d, sparseMetadata=sparse_metadata, scaleA=scale_a, scaleB=scale_b, blockScale=block_scale, collectorOp=collector_op, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05MMASparseOp(_ods_ir.OpView):
  r"""
  The `tcgen05.mma.sp` operation is an asynchronous tensor core instruction
  that performs matrix multiplication, accumulation with sparse `A` matrix in
  a single fused operation. It targets 5th-generation tensor cores, providing
  developers with fine-grained control over execution and scheduling.
  
  ```
  D = A * B + (D * 2^ -scaleInputD)    // if `scaleInputD` is provided
  D = A * B                            // if `enableInputD` is false
  D = A * B + D                        // otherwise
  ```
  
  where:
  - A is an `M x (K / 2)` matrix in tensor memory or described using shared memory descriptor
  - B is a `K x N` matrix described using shared memory descriptor
  - D is an `M x N` accumulator matrix in tensor memory
  - sparseMetadata located in tensor memory specifies the mapping of the `K / 2`
  non-zero elements to the K elements before performing the MMA operation
  
  Other attributes and operands are similar to that of tcgen05.mma Op
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-sp)
  """

  OPERATION_NAME = "nvvm.tcgen05.mma.sp"

  _ODS_OPERAND_SEGMENTS = [1,1,1,1,1,1,0,0,]

  _ODS_REGIONS = (0, True)

  def __init__(self, kind, ctaGroup, matrixD, matrixA, matrixB, idesc, enableInputD, sparseMetadata, *, collectorOp=None, aShift=None, scaleInputD=None, disableOutputLane=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(matrixD)
    operands.append(matrixA)
    operands.append(matrixB)
    operands.append(idesc)
    operands.append(enableInputD)
    operands.append(sparseMetadata)
    operands.append(scaleInputD)
    operands.append(disableOutputLane)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('Tcgen05MMAKindAttr')) else
      _ods_ir.AttrBuilder.get('Tcgen05MMAKindAttr')(kind, context=_ods_context))
    attributes["ctaGroup"] = (ctaGroup if (
    isinstance(ctaGroup, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
      _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(ctaGroup, context=_ods_context))
    if collectorOp is not None: attributes["collectorOp"] = (collectorOp if (
        isinstance(collectorOp, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05MMACollectorOpAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05MMACollectorOpAttr')(collectorOp, context=_ods_context))
    if bool(aShift): attributes["aShift"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def matrixD(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 0)
    return operand_range[0]

  @builtins.property
  def matrixA(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 1)
    return operand_range[0]

  @builtins.property
  def matrixB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 2)
    return operand_range[0]

  @builtins.property
  def idesc(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 3)
    return operand_range[0]

  @builtins.property
  def enableInputD(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 4)
    return operand_range[0]

  @builtins.property
  def sparseMetadata(self) -> _ods_ir.Value:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 5)
    return operand_range[0]

  @builtins.property
  def scaleInputD(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 6)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def disableOutputLane(self) -> _Optional[_ods_ir.Value[_ods_ir.VectorType]]:
    operand_range = _ods_segmented_accessor(
         self.operation.operands,
         self.operation.attributes["operandSegmentSizes"], 7)
    return operand_range[0] if len(operand_range) > 0 else None

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def ctaGroup(self) -> _ods_ir.Attribute:
    return self.operation.attributes["ctaGroup"]

  @ctaGroup.setter
  def ctaGroup(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["ctaGroup"] = value

  @builtins.property
  def collectorOp(self) -> _ods_ir.Attribute:
    return self.operation.attributes["collectorOp"]

  @collectorOp.setter
  def collectorOp(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collectorOp"] = value

  @builtins.property
  def aShift(self) -> bool:
    return "aShift" in self.operation.attributes

  @aShift.setter
  def aShift(self, value):
    if bool(value):
      self.operation.attributes["aShift"] = _ods_ir.UnitAttr.get()
    elif "aShift" in self.operation.attributes:
      del self.operation.attributes["aShift"]

  @aShift.deleter
  def aShift(self):
    del self.operation.attributes["aShift"]

def tcgen05_mma_sp(kind, cta_group, matrix_d, matrix_a, matrix_b, idesc, enable_input_d, sparse_metadata, *, collector_op=None, a_shift=None, scale_input_d=None, disable_output_lane=None, loc=None, ip=None) -> Tcgen05MMASparseOp:
  return Tcgen05MMASparseOp(kind=kind, ctaGroup=cta_group, matrixD=matrix_d, matrixA=matrix_a, matrixB=matrix_b, idesc=idesc, enableInputD=enable_input_d, sparseMetadata=sparse_metadata, collectorOp=collector_op, aShift=a_shift, scaleInputD=scale_input_d, disableOutputLane=disable_output_lane, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05MMAWsOp(_ods_ir.OpView):
  r"""
  The `tcgen05.mma.ws` operation is an asynchronous tensor core instruction
  that performs weight stationary convolution matrix multiplication, accumulation
  in a single fused operation. It targets 5th-generation tensor cores, providing
  developers with fine-grained control over execution, and scheduling.
  
  ```
  D = A * B`      // if `enableInputD` is false
  D = A * B + D`  // otherwise
  ```
  
  where:
  - A is an `M x K` matrix in tensor memory or described using shared memory descriptor
  - B is a `K x N` matrix described using shared memory descriptor
  - D is an `M x N` accumulator matrix in tensor memory
  
  The `shared memory descriptor` can be generated using `tcgen05.mma_smem_desc` Op
  
  - idesc is a 32-bit value representing the [Instruction Descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instruction-descriptor)
  
  Optional Operands:
  - zeroColMask is a 64 bit value representing the [Zero-column mask descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-zero-column-mask-descriptor)
  
  Required Attributes:
  - `kind` is a Tcgen05MMAKind attribute
  
  Default Valued Attributes:
  - collectorBBuffer specifies collector buffer for matrix B: b0 (default), b1, b2, b3
  
  - collectorOp is a Tcgen05MMACollectorOp attribute with matrix B as the collector buffer
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-ws)
  """

  OPERATION_NAME = "nvvm.tcgen05.mma.ws"

  _ODS_REGIONS = (0, True)

  def __init__(self, kind, matrixD, matrixA, matrixB, idesc, enableInputD, *, collectorBBuffer=None, collectorOp=None, zeroColMask=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(matrixD)
    operands.append(matrixA)
    operands.append(matrixB)
    operands.append(idesc)
    operands.append(enableInputD)
    if zeroColMask is not None: operands.append(zeroColMask)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('Tcgen05MMAKindAttr')) else
      _ods_ir.AttrBuilder.get('Tcgen05MMAKindAttr')(kind, context=_ods_context))
    if collectorBBuffer is not None: attributes["collectorBBuffer"] = (collectorBBuffer if (
        isinstance(collectorBBuffer, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05MMACollectorBBufferAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05MMACollectorBBufferAttr')(collectorBBuffer, context=_ods_context))
    if collectorOp is not None: attributes["collectorOp"] = (collectorOp if (
        isinstance(collectorOp, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05MMACollectorOpAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05MMACollectorOpAttr')(collectorOp, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def matrixD(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def matrixA(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def matrixB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[2]

  @builtins.property
  def idesc(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[3]

  @builtins.property
  def enableInputD(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[4]

  @builtins.property
  def zeroColMask(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 6 else self.operation.operands[5]

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def collectorBBuffer(self) -> _ods_ir.Attribute:
    return self.operation.attributes["collectorBBuffer"]

  @collectorBBuffer.setter
  def collectorBBuffer(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collectorBBuffer"] = value

  @builtins.property
  def collectorOp(self) -> _ods_ir.Attribute:
    return self.operation.attributes["collectorOp"]

  @collectorOp.setter
  def collectorOp(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collectorOp"] = value

def tcgen05_mma_ws(kind, matrix_d, matrix_a, matrix_b, idesc, enable_input_d, *, collector_b_buffer=None, collector_op=None, zero_col_mask=None, loc=None, ip=None) -> Tcgen05MMAWsOp:
  return Tcgen05MMAWsOp(kind=kind, matrixD=matrix_d, matrixA=matrix_a, matrixB=matrix_b, idesc=idesc, enableInputD=enable_input_d, collectorBBuffer=collector_b_buffer, collectorOp=collector_op, zeroColMask=zero_col_mask, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05MMAWsSparseOp(_ods_ir.OpView):
  r"""
  The `tcgen05.mma.ws.sp` operation is an asynchronous tensor core instruction
  that performs weight stationary convolution matrix multiplication, accumulation
  with sparse `A` matrix in a single fused operation. It targets 5th-generation
  tensor cores, providing developers with fine-grained control over execution,
  and scheduling.
  
  ```
  D = A * B`      // if `enableInputD` is false
  D = A * B + D`  // otherwise
  ```
  
  where:
  - A is an M x (K / 2) matrix in memory or descriptor format
  - B is a K x N matrix
  - D is an M x N accumulator matrix
  - sparseMetadata located in tensor memory specifies the mapping of the `K / 2`
  non-zero elements to the K elements before performing the MMA operation
  
  Other attributes and operands are similar to that of tcgen05.mma.ws Op
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-ws-sp)
  """

  OPERATION_NAME = "nvvm.tcgen05.mma.ws.sp"

  _ODS_REGIONS = (0, True)

  def __init__(self, kind, matrixD, matrixA, matrixB, idesc, enableInputD, sparseMetadata, *, collectorBBuffer=None, collectorOp=None, zeroColMask=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(matrixD)
    operands.append(matrixA)
    operands.append(matrixB)
    operands.append(idesc)
    operands.append(enableInputD)
    operands.append(sparseMetadata)
    if zeroColMask is not None: operands.append(zeroColMask)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('Tcgen05MMAKindAttr')) else
      _ods_ir.AttrBuilder.get('Tcgen05MMAKindAttr')(kind, context=_ods_context))
    if collectorBBuffer is not None: attributes["collectorBBuffer"] = (collectorBBuffer if (
        isinstance(collectorBBuffer, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05MMACollectorBBufferAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05MMACollectorBBufferAttr')(collectorBBuffer, context=_ods_context))
    if collectorOp is not None: attributes["collectorOp"] = (collectorOp if (
        isinstance(collectorOp, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('Tcgen05MMACollectorOpAttr')) else
          _ods_ir.AttrBuilder.get('Tcgen05MMACollectorOpAttr')(collectorOp, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def matrixD(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def matrixA(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def matrixB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[2]

  @builtins.property
  def idesc(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[3]

  @builtins.property
  def enableInputD(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[4]

  @builtins.property
  def sparseMetadata(self) -> _ods_ir.Value:
    return self.operation.operands[5]

  @builtins.property
  def zeroColMask(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 7 else self.operation.operands[6]

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def collectorBBuffer(self) -> _ods_ir.Attribute:
    return self.operation.attributes["collectorBBuffer"]

  @collectorBBuffer.setter
  def collectorBBuffer(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collectorBBuffer"] = value

  @builtins.property
  def collectorOp(self) -> _ods_ir.Attribute:
    return self.operation.attributes["collectorOp"]

  @collectorOp.setter
  def collectorOp(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["collectorOp"] = value

def tcgen05_mma_ws_sp(kind, matrix_d, matrix_a, matrix_b, idesc, enable_input_d, sparse_metadata, *, collector_b_buffer=None, collector_op=None, zero_col_mask=None, loc=None, ip=None) -> Tcgen05MMAWsSparseOp:
  return Tcgen05MMAWsSparseOp(kind=kind, matrixD=matrix_d, matrixA=matrix_a, matrixB=matrix_b, idesc=idesc, enableInputD=enable_input_d, sparseMetadata=sparse_metadata, collectorBBuffer=collector_b_buffer, collectorOp=collector_op, zeroColMask=zero_col_mask, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05MmaSmemDescOp(_ods_ir.OpView):
  r"""
  The `nvvm.tcgen05_mma_smem_desc` constructs a Shared Memory descriptor
  for tcgen05.mma. This descriptor is a 64-bit value which describes the
  properties of multiplicand matrix in shared memory including its location
  in the shared memory of the current CTA.
  
  ```
  +-----------+------+------------------------------------------------------+
  | Bit-field | Size | Description                                          |
  +-----------+------+------------------------------------------------------+
  | 0-13      | 14   | Matrix start address                                 |
  | 14-15     | 2    | Reserved                                             |
  | 16-29     | 14   | Leading dim relative-offset (or) absolute-address    |
  | 30-31     | 2    | Reserved                                             |
  | 32-45     | 14   | Stride dimension byte offset                         |
  | 46-48     | 3    | Fixed constant value of 0b001                        |
  | 49-51     | 3    | Matrix base offset                                   |
  | 52        | 1    | Leading dimension stride mode:                       |
  |           |      |   0: byte offset relative                            |
  |           |      |   1: byte address absolute                           |
  | 53-60     | 8    | Fixed constant value of 0xb00000000                  |
  | 61-63     | 3    | Swizzling mode:                                      |
  |           |      |   0: No swizzling                                    |
  |           |      |   1: 128-Byte with 32B atomic swizzling              |
  |           |      |   2: 128-Byte swizzling                              |
  |           |      |   4: 64-Byte swizzling                               |
  |           |      |   6: 32-Byte swizzling                               |
  |           |      |   (Values 3, 5 and 7 are invalid)                    |
  +-----------+------+------------------------------------------------------+    
  ```
  
  Example:
  ```mlir
    %desc = nvvm.tcgen05.mma_smem_desc (%startAddr, %leadingDimOffset, %strideDimOffset,
                                        %baseOffset, %leadingDimMode, %swizzleMode) : (i32, i32, i32, i8, i1, i8) -> i64
  ```
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-shared-memory-descriptor)
  """

  OPERATION_NAME = "nvvm.tcgen05.mma_smem_desc"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, startAddr, leadingDimOffset, strideDimOffset, baseOffset, leadingDimMode, swizzleMode, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(startAddr)
    operands.append(leadingDimOffset)
    operands.append(strideDimOffset)
    operands.append(baseOffset)
    operands.append(leadingDimMode)
    operands.append(swizzleMode)
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def startAddr(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[0]

  @builtins.property
  def leadingDimOffset(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def strideDimOffset(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[2]

  @builtins.property
  def baseOffset(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[3]

  @builtins.property
  def leadingDimMode(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[4]

  @builtins.property
  def swizzleMode(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[5]

  @builtins.property
  def res(self) -> _ods_ir.OpResult[_ods_ir.IntegerType]:
    return self.operation.results[0]

def tcgen05_mma_smem_desc(res, start_addr, leading_dim_offset, stride_dim_offset, base_offset, leading_dim_mode, swizzle_mode, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return Tcgen05MmaSmemDescOp(res=res, startAddr=start_addr, leadingDimOffset=leading_dim_offset, strideDimOffset=stride_dim_offset, baseOffset=base_offset, leadingDimMode=leading_dim_mode, swizzleMode=swizzle_mode, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class Tcgen05RelinquishAllocPermitOp(_ods_ir.OpView):
  r"""
  The `tcgen05.relinquish_alloc_permit` Op specifies that the CTA
  of the executing thread is relinquishing the right to allocate
  Tensor Memory. So, it is illegal for a CTA to perform `tcgen05.alloc`
  after any of its constituent threads execute `tcgen05.relinquish_alloc_permit`.
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
  """

  OPERATION_NAME = "nvvm.tcgen05.relinquish_alloc_permit"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, group=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if group is not None: attributes["group"] = (group if (
        isinstance(group, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
          _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(group, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def group(self) -> _ods_ir.Attribute:
    return self.operation.attributes["group"]

  @group.setter
  def group(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["group"] = value

def tcgen05_relinquish_alloc_permit(*, group=None, loc=None, ip=None) -> Tcgen05RelinquishAllocPermitOp:
  return Tcgen05RelinquishAllocPermitOp(group=group, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05ShiftOp(_ods_ir.OpView):
  r"""
  The `tcgen05.shift` is an asynchronous instruction which initiates
  the shifting of 32-byte elements downwards across all the rows,
  except the last, by one row. The operand `taddr` specifies the base
  address of the matrix in Tensor Memory whose rows must be down shifted.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-shift)
  """

  OPERATION_NAME = "nvvm.tcgen05.shift"

  _ODS_REGIONS = (0, True)

  def __init__(self, taddr, *, group=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(taddr)
    _ods_context = _ods_get_default_loc_context(loc)
    if group is not None: attributes["group"] = (group if (
        isinstance(group, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('CTAGroupKindAttr')) else
          _ods_ir.AttrBuilder.get('CTAGroupKindAttr')(group, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def taddr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def group(self) -> _ods_ir.Attribute:
    return self.operation.attributes["group"]

  @group.setter
  def group(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["group"] = value

def tcgen05_shift(taddr, *, group=None, loc=None, ip=None) -> Tcgen05ShiftOp:
  return Tcgen05ShiftOp(taddr=taddr, group=group, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05StOp(_ods_ir.OpView):
  r"""
  Instruction `tcgen05.st` asynchronously stores data from the source register `r`
  into the Tensor Memory at the location specified by the 32-bit address operand
  `tmemAddr`, collectively across all threads of the warps.
  
  The `shape` and the `num` attribute together determines the total dimension of
  the data which is stored to the Tensor Memory. The `shape` indicates the base
  dimension of data to be accessed. The `num` attribute indicates the repeat
  factor on the base dimension resulting in the total dimension of the data that
  is accessed.
  
  The shape `16x32bx2` performs two accesses into Tensor Memory of the shape
  `16x32b`. The base address of the first access is specified by `tmemAddr`
  and the base address of the second access is specified by
  `tmemAddr + offset`, where `offset` is an immediate argument.
  
  The unit attribute `unpack` can be used to unpack a 32-bit element
  in the register into two 16-bit elements and store them in adjacent columns.
  
  The following table describes the size of the vector for various combinations
  of `num` and `shape` attributes:
  ```
  |=====================================================================|
  | num/shape      |     16x32bx2/16x64b/32x32b |  16x128b   | 16x256b  |
  |=====================================================================|
  | x1             |          1                 |    2       |    4     |
  | x2             |          2                 |    4       |    8     |
  | x4             |          4                 |    8       |    16    |
  | x8             |          8                 |    16      |    32    |
  | x16            |          16                |    32      |    64    |
  | x32            |          32                |    64      |    128   |
  | x64            |          64                |    128     |    NA    |
  | x128           |          128               |    NA      |    NA    |
  |=====================================================================|
  ```
  
  Example:
  ```mlir
    nvvm.tcgen05.st %tmemAddr, %val, %offset unpack {
      shape = #nvvm.tcgen05_ldst_shape<shape_16x32bx2>,
    } : <2xi32>
  ```
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-st)
  """

  OPERATION_NAME = "nvvm.tcgen05.st"

  _ODS_REGIONS = (0, True)

  def __init__(self, shape, tmemAddr, val, *, unpack=None, offset=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(tmemAddr)
    operands.append(val)
    if offset is not None: operands.append(offset)
    _ods_context = _ods_get_default_loc_context(loc)
    if bool(unpack): attributes["unpack"] = _ods_ir.UnitAttr.get(
      _ods_get_default_loc_context(loc))
    attributes["shape"] = (shape if (
    isinstance(shape, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('Tcgen05LdStShapeAttr')) else
      _ods_ir.AttrBuilder.get('Tcgen05LdStShapeAttr')(shape, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def tmemAddr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def val(self) -> _ods_ir.Value:
    return self.operation.operands[1]

  @builtins.property
  def offset(self) -> _Optional[_ods_ir.Value[_ods_ir.IntegerType]]:
    return None if len(self.operation.operands) < 3 else self.operation.operands[2]

  @builtins.property
  def unpack(self) -> bool:
    return "unpack" in self.operation.attributes

  @unpack.setter
  def unpack(self, value):
    if bool(value):
      self.operation.attributes["unpack"] = _ods_ir.UnitAttr.get()
    elif "unpack" in self.operation.attributes:
      del self.operation.attributes["unpack"]

  @unpack.deleter
  def unpack(self):
    del self.operation.attributes["unpack"]

  @builtins.property
  def shape(self) -> _ods_ir.Attribute:
    return self.operation.attributes["shape"]

  @shape.setter
  def shape(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["shape"] = value

def tcgen05_st(shape, tmem_addr, val, *, unpack=None, offset=None, loc=None, ip=None) -> Tcgen05StOp:
  return Tcgen05StOp(shape=shape, tmemAddr=tmem_addr, val=val, unpack=unpack, offset=offset, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class Tcgen05WaitOp(_ods_ir.OpView):
  r"""
  The `tcgen05.wait<load>` causes the executing thread to block until
  all prior `tcgen05.ld` operations issued by the executing thread
  have completed. Similarly, the `tcgen05.wait<store>` causes the executing
  thread to block until all prior `tcgen05.st` operations issued by the
  executing thread have completed.
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-wait)
  """

  OPERATION_NAME = "nvvm.tcgen05.wait"

  _ODS_REGIONS = (0, True)

  def __init__(self, kind, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('Tcgen05WaitKindAttr')) else
      _ods_ir.AttrBuilder.get('Tcgen05WaitKindAttr')(kind, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

def tcgen05_wait(kind, *, loc=None, ip=None) -> Tcgen05WaitOp:
  return Tcgen05WaitOp(kind=kind, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class ThreadIdXOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.tid.x"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_tid_x(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ThreadIdXOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ThreadIdYOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.tid.y"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_tid_y(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ThreadIdYOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class ThreadIdZOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.tid.z"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_tid_z(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return ThreadIdZOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class VoteSyncOp(_ods_ir.OpView):
  r"""
  The `vote.sync` op will cause executing thread to wait until all non-exited
  threads corresponding to membermask have executed `vote.sync` with the same
  qualifiers and same membermask value before resuming execution.
  
  The vote operation kinds are:
  - `any`: True if source predicate is True for some thread in membermask.
  - `all`: True if source predicate is True for all non-exited threads in
    membermask. 
  - `uni`: True if source predicate has the same value in all non-exited
    threads in membermask.
  - `ballot`: In the ballot form, the destination result is a 32 bit integer.
    In this form, the predicate from each thread in membermask are copied into
    the corresponding bit position of the result, where the bit position
    corresponds to the thread's lane id.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-vote-sync)
  """

  OPERATION_NAME = "nvvm.vote.sync"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, mask, pred, kind, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(mask)
    operands.append(pred)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["kind"] = (kind if (
    isinstance(kind, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('VoteSyncKindAttr')) else
      _ods_ir.AttrBuilder.get('VoteSyncKindAttr')(kind, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def mask(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[0]

  @builtins.property
  def pred(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def kind(self) -> _ods_ir.Attribute:
    return self.operation.attributes["kind"]

  @kind.setter
  def kind(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["kind"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def vote_sync(res, mask, pred, kind, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return VoteSyncOp(res=res, mask=mask, pred=pred, kind=kind, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class WMMALoadOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.wmma.load"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, ptr, stride, m, n, k, layout, eltype, frag, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(ptr)
    operands.append(stride)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["m"] = (m if (
    isinstance(m, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(m, context=_ods_context))
    attributes["n"] = (n if (
    isinstance(n, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(n, context=_ods_context))
    attributes["k"] = (k if (
    isinstance(k, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(k, context=_ods_context))
    attributes["layout"] = (layout if (
    isinstance(layout, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMALayoutAttr')) else
      _ods_ir.AttrBuilder.get('MMALayoutAttr')(layout, context=_ods_context))
    attributes["eltype"] = (eltype if (
    isinstance(eltype, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
      _ods_ir.AttrBuilder.get('MMATypesAttr')(eltype, context=_ods_context))
    attributes["frag"] = (frag if (
    isinstance(frag, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMAFragAttr')) else
      _ods_ir.AttrBuilder.get('MMAFragAttr')(frag, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def ptr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def stride(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def m(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["m"]

  @m.setter
  def m(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["m"] = value

  @builtins.property
  def n(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["n"]

  @n.setter
  def n(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["n"] = value

  @builtins.property
  def k(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["k"]

  @k.setter
  def k(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["k"] = value

  @builtins.property
  def layout(self) -> _ods_ir.Attribute:
    return self.operation.attributes["layout"]

  @layout.setter
  def layout(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["layout"] = value

  @builtins.property
  def eltype(self) -> _ods_ir.Attribute:
    return self.operation.attributes["eltype"]

  @eltype.setter
  def eltype(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["eltype"] = value

  @builtins.property
  def frag(self) -> _ods_ir.Attribute:
    return self.operation.attributes["frag"]

  @frag.setter
  def frag(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["frag"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def wmma_load(res, ptr, stride, m, n, k, layout, eltype, frag, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return WMMALoadOp(res=res, ptr=ptr, stride=stride, m=m, n=n, k=k, layout=layout, eltype=eltype, frag=frag, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class WMMAMmaOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.wmma.mma"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, m, n, k, layoutA, layoutB, eltypeA, eltypeB, args, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.extend(_get_op_results_or_values(args))
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["m"] = (m if (
    isinstance(m, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(m, context=_ods_context))
    attributes["n"] = (n if (
    isinstance(n, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(n, context=_ods_context))
    attributes["k"] = (k if (
    isinstance(k, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(k, context=_ods_context))
    attributes["layoutA"] = (layoutA if (
    isinstance(layoutA, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMALayoutAttr')) else
      _ods_ir.AttrBuilder.get('MMALayoutAttr')(layoutA, context=_ods_context))
    attributes["layoutB"] = (layoutB if (
    isinstance(layoutB, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMALayoutAttr')) else
      _ods_ir.AttrBuilder.get('MMALayoutAttr')(layoutB, context=_ods_context))
    attributes["eltypeA"] = (eltypeA if (
    isinstance(eltypeA, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
      _ods_ir.AttrBuilder.get('MMATypesAttr')(eltypeA, context=_ods_context))
    attributes["eltypeB"] = (eltypeB if (
    isinstance(eltypeB, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
      _ods_ir.AttrBuilder.get('MMATypesAttr')(eltypeB, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def args(self) -> _ods_ir.OpOperandList:
    _ods_variadic_group_length = len(self.operation.operands) - 1 + 1
    return self.operation.operands[0:0 + _ods_variadic_group_length]

  @builtins.property
  def m(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["m"]

  @m.setter
  def m(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["m"] = value

  @builtins.property
  def n(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["n"]

  @n.setter
  def n(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["n"] = value

  @builtins.property
  def k(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["k"]

  @k.setter
  def k(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["k"] = value

  @builtins.property
  def layoutA(self) -> _ods_ir.Attribute:
    return self.operation.attributes["layoutA"]

  @layoutA.setter
  def layoutA(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["layoutA"] = value

  @builtins.property
  def layoutB(self) -> _ods_ir.Attribute:
    return self.operation.attributes["layoutB"]

  @layoutB.setter
  def layoutB(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["layoutB"] = value

  @builtins.property
  def eltypeA(self) -> _ods_ir.Attribute:
    return self.operation.attributes["eltypeA"]

  @eltypeA.setter
  def eltypeA(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["eltypeA"] = value

  @builtins.property
  def eltypeB(self) -> _ods_ir.Attribute:
    return self.operation.attributes["eltypeB"]

  @eltypeB.setter
  def eltypeB(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["eltypeB"] = value

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def wmma_mma(res, m, n, k, layout_a, layout_b, eltype_a, eltype_b, args, *, loc=None, ip=None) -> _ods_ir.OpResult:
  return WMMAMmaOp(res=res, m=m, n=n, k=k, layoutA=layout_a, layoutB=layout_b, eltypeA=eltype_a, eltypeB=eltype_b, args=args, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class WMMAStoreOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.wmma.store"

  _ODS_REGIONS = (0, True)

  def __init__(self, ptr, m, n, k, layout, eltype, args, stride, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(ptr)
    operands.extend(_get_op_results_or_values(args))
    operands.append(stride)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["m"] = (m if (
    isinstance(m, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(m, context=_ods_context))
    attributes["n"] = (n if (
    isinstance(n, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(n, context=_ods_context))
    attributes["k"] = (k if (
    isinstance(k, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I32Attr')) else
      _ods_ir.AttrBuilder.get('I32Attr')(k, context=_ods_context))
    attributes["layout"] = (layout if (
    isinstance(layout, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMALayoutAttr')) else
      _ods_ir.AttrBuilder.get('MMALayoutAttr')(layout, context=_ods_context))
    attributes["eltype"] = (eltype if (
    isinstance(eltype, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMATypesAttr')) else
      _ods_ir.AttrBuilder.get('MMATypesAttr')(eltype, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def ptr(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def args(self) -> _ods_ir.OpOperandList:
    _ods_variadic_group_length = len(self.operation.operands) - 3 + 1
    return self.operation.operands[1:1 + _ods_variadic_group_length]

  @builtins.property
  def stride(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    _ods_variadic_group_length = len(self.operation.operands) - 3 + 1
    return self.operation.operands[2 + _ods_variadic_group_length - 1]

  @builtins.property
  def m(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["m"]

  @m.setter
  def m(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["m"] = value

  @builtins.property
  def n(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["n"]

  @n.setter
  def n(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["n"] = value

  @builtins.property
  def k(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["k"]

  @k.setter
  def k(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["k"] = value

  @builtins.property
  def layout(self) -> _ods_ir.Attribute:
    return self.operation.attributes["layout"]

  @layout.setter
  def layout(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["layout"] = value

  @builtins.property
  def eltype(self) -> _ods_ir.Attribute:
    return self.operation.attributes["eltype"]

  @eltype.setter
  def eltype(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["eltype"] = value

def wmma_store(ptr, m, n, k, layout, eltype, args, stride, *, loc=None, ip=None) -> WMMAStoreOp:
  return WMMAStoreOp(ptr=ptr, m=m, n=n, k=k, layout=layout, eltype=eltype, args=args, stride=stride, loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class WarpDimOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.nwarpid"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_nwarpid(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return WarpDimOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class WarpIdOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.warpid"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_warpid(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return WarpIdOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class WarpSizeOp(_ods_ir.OpView):
  OPERATION_NAME = "nvvm.read.ptx.sreg.warpsize"

  _ODS_REGIONS = (0, True)

  def __init__(self, res, *, range=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    if range is not None: attributes["range"] = (range if (
        isinstance(range, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('LLVM_ConstantRangeAttr')) else
          _ods_ir.AttrBuilder.get('LLVM_ConstantRangeAttr')(range, context=_ods_context))
    results = []
    results.append(res)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def range(self) -> _Optional[_ods_ir.Attribute]:
    if "range" not in self.operation.attributes:
      return None
    return self.operation.attributes["range"]

  @range.setter
  def range(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["range"] = value
    elif "range" in self.operation.attributes:
      del self.operation.attributes["range"]

  @range.deleter
  def range(self):
    del self.operation.attributes["range"]

  @builtins.property
  def res(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def read_ptx_sreg_warpsize(res, *, range=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return WarpSizeOp(res=res, range=range, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class WgmmaFenceAlignedOp(_ods_ir.OpView):
  r"""
  Enforce an ordering of register accesses between warpgroup level matrix 
  multiplication and other operations. 
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions-wgmma-fence)
  """

  OPERATION_NAME = "nvvm.wgmma.fence.aligned"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

def wgmma_fence_aligned(*, loc=None, ip=None) -> WgmmaFenceAlignedOp:
  return WgmmaFenceAlignedOp(loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class WgmmaGroupSyncAlignedOp(_ods_ir.OpView):
  r"""
  Commits all prior uncommitted warpgroup level matrix multiplication operations.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions-wgmma-commit-group)
  """

  OPERATION_NAME = "nvvm.wgmma.commit.group.sync.aligned"

  _ODS_REGIONS = (0, True)

  def __init__(self, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

def wgmma_commit_group_sync_aligned(*, loc=None, ip=None) -> WgmmaGroupSyncAlignedOp:
  return WgmmaGroupSyncAlignedOp(loc=loc, ip=ip)

@_ods_cext.register_operation(_Dialect)
class WgmmaMmaAsyncOp(_ods_ir.OpView):
  r"""
  The warpgroup (128 threads) level matrix multiply and accumulate operation 
  has either of the following forms, where matrix D is called accumulator:
    D = A * B + D
    D = A * B, where the input from accumulator D is disabled.
  
  Supported shapes:  
  ```
  |--------------|--------------|------------|--------------|---------------|
  |              |              |            |              |f16+=e4m3*e4m3 |
  |              |              |            |              |f16+=e5m2*e5m2 |
  |f32+=tf32*tf32|f16+=f16 *f16 | s32+=s8*s8 |s32 += b1 * b1|f16+=e5m2*e4m3 |
  |              |f32+=f16 *f16 | s32+=u8*u8 |              |f16+=e4m3*e5m2 |
  |              |f32+=bf16*bf16| s32+=u8*u8 |              |f16+=e4m3*e5m2 |
  |              |f32+=bf16*bf16| s32+=s8*u8 |              |f32+=e4m3*e4m3 |
  |              |              | s32+=u8*s8 |              |f32+=e5m2*e5m2 |
  |              |              |            |              |f32+=e4m3*e5m2 |
  |              |              |            |              |f32+=e4m3*e5m2 |
  |--------------|--------------|------------|--------------|---------------|
  |   .m64n8k8   |  .m64n8k16   | .m64n8k32  | .m64n8k256   | .m64n8k32     |
  |   .m64n16k8  |  .m64n16k16  | .m64n16k32 | .m64n16k256  | .m64n16k32    |
  |   .m64n24k8  |  .m64n24k16  | .m64n24k32 | .m64n24k256  | .m64n24k32    |
  |   .m64n32k8  |  .m64n32k16  | .m64n32k32 | .m64n32k256  | .m64n32k32    |
  |   .m64n40k8  |  .m64n40k16  | .m64n48k32 | .m64n48k256  | .m64n40k32    |
  |   .m64n48k8  |  .m64n48k16  | .m64n64k32 | .m64n64k256  | .m64n48k32    |
  |   .m64n56k8  |  .m64n56k16  | .m64n80k32 | .m64n80k256  | .m64n56k32    |
  |   .m64n64k8  |  .m64n64k16  | .m64n96k32 | .m64n96k256  | .m64n64k32    |
  |   .m64n72k8  |  .m64n72k16  | .m64n112k32| .m64n112k256 | .m64n72k32    |
  |   .m64n80k8  |  .m64n80k16  | .m64n128k32| .m64n128k256 | .m64n80k32    |
  |   .m64n88k8  |  .m64n88k16  | .m64n144k32| .m64n144k256 | .m64n88k32    |
  |   .m64n96k8  |  .m64n96k16  | .m64n160k32| .m64n160k256 | .m64n96k32    |
  |   .m64n104k8 |  .m64n104k16 | .m64n176k32| .m64n176k256 | .m64n104k32   |
  |   .m64n112k8 |  .m64n112k16 | .m64n192k32| .m64n192k256 | .m64n112k32   |
  |   .m64n120k8 |  .m64n120k16 | .m64n208k32| .m64n208k256 | .m64n120k32   |
  |   .m64n128k8 |  .m64n128k16 | .m64n224k32| .m64n224k256 | .m64n128k32   |
  |   .m64n136k8 |  .m64n136k16 | .m64n240k32| .m64n240k256 | .m64n136k32   |
  |   .m64n144k8 |  .m64n144k16 | .m64n256k32| .m64n256k256 | .m64n144k32   |
  |   .m64n152k8 |  .m64n152k16 |            |              | .m64n152k32   |
  |   .m64n160k8 |  .m64n160k16 |            |              | .m64n160k32   |
  |   .m64n168k8 |  .m64n168k16 |            |              | .m64n168k32   |
  |   .m64n176k8 |  .m64n176k16 |            |              | .m64n176k32   |
  |   .m64n184k8 |  .m64n184k16 |            |              | .m64n184k32   |
  |   .m64n192k8 |  .m64n192k16 |            |              | .m64n192k32   |
  |   .m64n200k8 |  .m64n200k16 |            |              | .m64n200k32   |
  |   .m64n208k8 |  .m64n208k16 |            |              | .m64n208k32   |
  |   .m64n216k8 |  .m64n216k16 |            |              | .m64n216k32   |
  |   .m64n224k8 |  .m64n224k16 |            |              | .m64n224k32   |
  |   .m64n232k8 |  .m64n232k16 |            |              | .m64n232k32   |
  |   .m64n240k8 |  .m64n240k16 |            |              | .m64n240k32   |
  |   .m64n248k8 |  .m64n248k16 |            |              | .m64n248k32   |
  |   .m64n256k8 |  .m64n256k16 |            |              | .m64n256k32   |
  |--------------|--------------|------------|--------------|---------------|
  ```
  
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions)
  """

  OPERATION_NAME = "nvvm.wgmma.mma_async"

  _ODS_REGIONS = (0, True)

  def __init__(self, results_, inouts, descriptorA, descriptorB, shape, typeA, typeB, typeD, scaleD, scaleA, scaleB, layoutA, layoutB, *, satfinite=None, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    operands.append(inouts)
    operands.append(descriptorA)
    operands.append(descriptorB)
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["shape"] = (shape if (
    isinstance(shape, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('NVVM_MMAShapeAttr')) else
      _ods_ir.AttrBuilder.get('NVVM_MMAShapeAttr')(shape, context=_ods_context))
    attributes["typeA"] = (typeA if (
    isinstance(typeA, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('WGMMATypesAttr')) else
      _ods_ir.AttrBuilder.get('WGMMATypesAttr')(typeA, context=_ods_context))
    attributes["typeB"] = (typeB if (
    isinstance(typeB, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('WGMMATypesAttr')) else
      _ods_ir.AttrBuilder.get('WGMMATypesAttr')(typeB, context=_ods_context))
    attributes["typeD"] = (typeD if (
    isinstance(typeD, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('WGMMATypesAttr')) else
      _ods_ir.AttrBuilder.get('WGMMATypesAttr')(typeD, context=_ods_context))
    attributes["scaleD"] = (scaleD if (
    isinstance(scaleD, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('WGMMAScaleOutAttr')) else
      _ods_ir.AttrBuilder.get('WGMMAScaleOutAttr')(scaleD, context=_ods_context))
    attributes["scaleA"] = (scaleA if (
    isinstance(scaleA, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('WGMMAScaleInAttr')) else
      _ods_ir.AttrBuilder.get('WGMMAScaleInAttr')(scaleA, context=_ods_context))
    attributes["scaleB"] = (scaleB if (
    isinstance(scaleB, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('WGMMAScaleInAttr')) else
      _ods_ir.AttrBuilder.get('WGMMAScaleInAttr')(scaleB, context=_ods_context))
    attributes["layoutA"] = (layoutA if (
    isinstance(layoutA, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMALayoutAttr')) else
      _ods_ir.AttrBuilder.get('MMALayoutAttr')(layoutA, context=_ods_context))
    attributes["layoutB"] = (layoutB if (
    isinstance(layoutB, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('MMALayoutAttr')) else
      _ods_ir.AttrBuilder.get('MMALayoutAttr')(layoutB, context=_ods_context))
    if satfinite is not None: attributes["satfinite"] = (satfinite if (
        isinstance(satfinite, _ods_ir.Attribute) or
        not _ods_ir.AttrBuilder.contains('MMAIntOverflowAttr')) else
          _ods_ir.AttrBuilder.get('MMAIntOverflowAttr')(satfinite, context=_ods_context))
    results = []
    results.append(results_)
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def inouts(self) -> _ods_ir.Value:
    return self.operation.operands[0]

  @builtins.property
  def descriptorA(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[1]

  @builtins.property
  def descriptorB(self) -> _ods_ir.Value[_ods_ir.IntegerType]:
    return self.operation.operands[2]

  @builtins.property
  def shape(self) -> _ods_ir.Attribute:
    return self.operation.attributes["shape"]

  @shape.setter
  def shape(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["shape"] = value

  @builtins.property
  def typeA(self) -> _ods_ir.Attribute:
    return self.operation.attributes["typeA"]

  @typeA.setter
  def typeA(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["typeA"] = value

  @builtins.property
  def typeB(self) -> _ods_ir.Attribute:
    return self.operation.attributes["typeB"]

  @typeB.setter
  def typeB(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["typeB"] = value

  @builtins.property
  def typeD(self) -> _ods_ir.Attribute:
    return self.operation.attributes["typeD"]

  @typeD.setter
  def typeD(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["typeD"] = value

  @builtins.property
  def scaleD(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scaleD"]

  @scaleD.setter
  def scaleD(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scaleD"] = value

  @builtins.property
  def scaleA(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scaleA"]

  @scaleA.setter
  def scaleA(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scaleA"] = value

  @builtins.property
  def scaleB(self) -> _ods_ir.Attribute:
    return self.operation.attributes["scaleB"]

  @scaleB.setter
  def scaleB(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["scaleB"] = value

  @builtins.property
  def layoutA(self) -> _ods_ir.Attribute:
    return self.operation.attributes["layoutA"]

  @layoutA.setter
  def layoutA(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["layoutA"] = value

  @builtins.property
  def layoutB(self) -> _ods_ir.Attribute:
    return self.operation.attributes["layoutB"]

  @layoutB.setter
  def layoutB(self, value: _ods_ir.Attribute):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["layoutB"] = value

  @builtins.property
  def satfinite(self) -> _Optional[_ods_ir.Attribute]:
    if "satfinite" not in self.operation.attributes:
      return None
    return self.operation.attributes["satfinite"]

  @satfinite.setter
  def satfinite(self, value: _Optional[_ods_ir.Attribute]):
    if value is not None:
      self.operation.attributes["satfinite"] = value
    elif "satfinite" in self.operation.attributes:
      del self.operation.attributes["satfinite"]

  @satfinite.deleter
  def satfinite(self):
    del self.operation.attributes["satfinite"]

  @builtins.property
  def results_(self) -> _ods_ir.OpResult:
    return self.operation.results[0]

def wgmma_mma_async(results_, inouts, descriptor_a, descriptor_b, shape, type_a, type_b, type_d, scale_d, scale_a, scale_b, layout_a, layout_b, *, satfinite=None, loc=None, ip=None) -> _ods_ir.OpResult:
  return WgmmaMmaAsyncOp(results_=results_, inouts=inouts, descriptorA=descriptor_a, descriptorB=descriptor_b, shape=shape, typeA=type_a, typeB=type_b, typeD=type_d, scaleD=scale_d, scaleA=scale_a, scaleB=scale_b, layoutA=layout_a, layoutB=layout_b, satfinite=satfinite, loc=loc, ip=ip).result

@_ods_cext.register_operation(_Dialect)
class WgmmaWaitGroupSyncOp(_ods_ir.OpView):
  r"""
  Signal the completion of a preceding warpgroup operation.
  
  [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions-wgmma-wait-group)
  """

  OPERATION_NAME = "nvvm.wgmma.wait.group.sync.aligned"

  _ODS_REGIONS = (0, True)

  def __init__(self, group, *, loc=None, ip=None):
    operands = []
    attributes = {}
    regions = None
    _ods_context = _ods_get_default_loc_context(loc)
    attributes["group"] = (group if (
    isinstance(group, _ods_ir.Attribute) or
    not _ods_ir.AttrBuilder.contains('I64Attr')) else
      _ods_ir.AttrBuilder.get('I64Attr')(group, context=_ods_context))
    results = []
    _ods_successors = None
    super().__init__(self.OPERATION_NAME, self._ODS_REGIONS, self._ODS_OPERAND_SEGMENTS, self._ODS_RESULT_SEGMENTS, attributes=attributes, results=results, operands=operands, successors=_ods_successors, regions=regions, loc=loc, ip=ip)

  @builtins.property
  def group(self) -> _ods_ir.IntegerAttr:
    return self.operation.attributes["group"]

  @group.setter
  def group(self, value: _ods_ir.IntegerAttr):
    if value is None:
      raise ValueError("'None' not allowed as value for mandatory attributes")
    self.operation.attributes["group"] = value

def wgmma_wait_group_sync_aligned(group, *, loc=None, ip=None) -> WgmmaWaitGroupSyncOp:
  return WgmmaWaitGroupSyncOp(group=group, loc=loc, ip=ip)