
    xpiz
                         d dl mZ d dlmZmZ g dZ G d de      Zedd       Zedd       Zedd
       Z	ed	ddd       Z
y)    )SwizzledSharedLayout)builtin_unwrap_if_constexpr)arriveinit
invalidateMBarrierLayoutwaitc                   .     e Zd ZdZddedef fdZ xZS )r	   z
    Layout for mbarrier synchronization in Ampere and later architectures.

    Args:
        ctas_per_cga (int): CTAs per CGA grouping. Defaults to 1.
        cta_split_num (int): CTA split factor. Defaults to 1.
    ctas_per_cgacta_split_numc           	      :    t         |   ddddg|g|gdg       y )N   r   )vec	per_phase	max_phaseorderr   r   	cta_order)super__init__)selfr   r   	__class__s      {/opt/services/ai/voice_agent/venv/lib/python3.12/site-packages/triton/experimental/gluon/language/nvidia/ampere/mbarrier.pyr   zMBarrierLayout.__init__   s2    #&(/c 	 	
    )r   r   )__name__
__module____qualname____doc__intr   __classcell__)r   s   @r   r	   r	      s     	
S 	
S 	
 	
r   r	   Nc                 f    t        |      }|j                  j                  | j                  |       y)z
    Initialize an mbarrier with a specified count.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to initialize.
        count (int): The initial count for the barrier.
    N)r   buildercreate_mbarrier_inithandle)mbarriercount	_semantics      r   r   r      s(     !'E**8??EBr   c                 N    |j                   j                  | j                         y)z
    Invalidate an mbarrier, resetting its state.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to invalidate.
    N)r"   create_mbarrier_invalr$   )r%   r'   s     r   r   r   )   s     ++HOO<r   Tc                     |j                  |      }|j                  |      }|D cg c]  }|j                   }}|j                  j                  | j                  |j                  |j                  |       yc c}w )a  
    Wait until the mbarrier object completes its current phase.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to wait on.
        phase (int): The phase index to wait for.
        pred (bool): Predicate. Operation is skipped if predicate is False. Defaults to True.
        deps (Sequence[shared_memory_descriptor]): Dependent allocations barrier is waiting on. Used to track liveness of dependent allocations. Defaults to ().
    N)	to_tensorr$   r"   create_mbarrier_wait)r%   phasepreddepsr'   xs         r   r
   r
   4   sg     &Et$D"#AHH#D#**8??ELL$++W[\ $s   A9)r.   r'   c                    d}|j                  |      }|j                  j                  | j                  ||j                         y)a  
    Arrive on an mbarrier, signaling that a thread has reached the barrier.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to arrive on.
        pred (bool): Predicate. Operation is skipped if predicate is False. Defaults to True.
    r   N)r+   r"   create_mbarrier_arriver$   )r%   r.   r'   r&   s       r   r   r   E   s9     Et$D,,X__eT[[Qr   )N)T N)+triton.experimental.gluon.language._layoutsr   (triton.experimental.gluon.language._corer   r   __all__r	   r   r   r
   r   r3   r   r   <module>r7      sx    L R
D
) 
* 		C 		C 	= 	= 	] 	]  	!T 
R 	
Rr   