
    bi&
                     X   d dl Z d dlZd dlmZ  e       sdej                  fdZyd dlZd dlmZ	  ej                   ej                  i d       ej                  i d       ej                  i d       ej                  i d	       ej                  i d
       ej                  i dd	       ej                  i dd	       ej                  i dd	       ej                  i d	d	       ej                  i d
d	       ej                  i d       ej                  i d       ej                  i d       ej                  i d	      gdg      ej                  de	j                  de	j                  de	j                  de	j                  fd              Zdej                  fdZy)    N)is_triton_availablexc                      y )N )r   s    p/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/bitsandbytes/triton/quantize_columnwise_and_transpose.py!quantize_columnwise_and_transposer   	   s           )
num_stages            )r   	num_warps)r   
n_elements)configskeyMN
BLOCK_SIZEP2c                    t        j                  d      }|}	t        j                  d|      }
|
|k  }|
|z  }|	|z   }t        j                  | |z   |      }t        j                  |      }t        j
                  t        j                  ||d      d      }t         j                  j                  d||z  z        }||z  }||
z   }t        j                  ||z   ||       t        j                  ||z   |       y )Nr   )axis)maskg     _@)
tl
program_idarangeloadabsmaxwhere	libdevicellrintstore)x_ptr
output_ptroutput_maxsr   r   r   r   r   pidblock_start	p2_arangep2_arange_maskr   offsetsr   abs_xmax_valoutput	new_startnew_offsetss                       r   "_quantize_columnwise_and_transposer2      s    < mm#IIa$	"QQ&GGEGO.9q	&&.%;!D$$Ua'k%:;!G	)+
k)6G
s"G,r	   c           
         | j                   \  }}t        j                  ||| j                  t        j                        }t        j                  | j                   d   | j                  t        j
                        }t        dt        j                  t        j                  |            z        }| j                  r|j                  sJ |j                         fd}t        |   | ||||||       ||fS )N)devicedtyper
   r   c                 8    t        j                  | d         fS )Nr   )tritoncdiv)metar   s    r   <lambda>z3quantize_columnwise_and_transpose.<locals>.<lambda>I   s    V[[T,5GHJ r	   )r   r   )shapetorchemptyr4   int8float16intmathceillog2is_cudanumelr2   )r   r   r   r/   r'   r   gridr   s          @r   r   r   @   s    ww1Q!((%**Ekk!''!*QXXU]]Styy1./0yyV^^++\\^
J*40FKUVXYfglno{""r	   )rA   r<    bitsandbytes.triton.triton_utilsr   Tensorr   r7   triton.languagelanguager   autotuneConfigjit	constexprr2   r   r	   r   <module>rO      s     @U\\   
 V__FMM"+FMM"+FMM"+FMM"+FMM",FMM"a8FMM"a8FMM"a8FMM"a8FMM"q9FMM"*FMM"*FMM"*FMM"*
  N#& ZZ-
 <<- <<- LL- LL- '(-4#U\\ #r	   