
    bi	                    H    d Z ddlmZ ddlZddlZddlmZ d Z	d Z
d Zd Zy)	uc  Implementation of HyperLogLog

This implements the HyperLogLog algorithm for cardinality estimation, found
in

    Philippe Flajolet, Éric Fusy, Olivier Gandouet and Frédéric Meunier.
        "HyperLogLog: the analysis of a near-optimal cardinality estimation
        algorithm". 2007 Conference on Analysis of Algorithms. Nice, France
        (2007)

    )annotationsN)hash_pandas_objectc                    t         j                  j                  | dt        j                  d      z        }|j	                  d      j                  t              }d|j                  d      z
  S )zGCompute the position of the first nonzero bit for each int in an array.       axis!   )npbitwise_andouterarangecumsumastypeboolsum)abitss     U/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/hyperloglog.pycompute_first_bitr      sX     >>1		"#56D;;A;%%d+Da       c                ,   d|cxk  rdk  st        d       t        d      d|z
  }d|z  }t        | d      }t        |t        j                        r|j
                  }|j                  t        j                        }||z	  }t        |      }t        j                  ||d      }|j                  d	      j                         d
   }|j                  t        j                  |      d      j                  j                  t        j                         S )N      zb should be between 8 and 16r   r   F)index)j	first_bitr   r   r   )
fill_value)
ValueErrorr   
isinstancepdSeries_valuesr   r   uint32r   	DataFramegroupbymaxreindexr   valuesuint8)	objbnum_bits_discardedmhashesr   r   dfseriess	            r   compute_hll_arrayr2      s     <R<788 788a	QA  51F&"))$]]299%F 	$$A!&)I 
AI6	7BZZ_  ";/F >>"))A,1>5<<CCBHHMMr   c                l    d|z  }| j                  t        |       |z  |      } | j                  d      S )Nr   r   r   )reshapelenr'   )Msr,   r.   s      r   reduce_stater7   7   s5    	QA 
SW\A	&B66q6>r   c                H   d|z  }t        | |      }ddd|z  z   z  }||z  d|j                  d       z  j                         z  |z  }|d|z  k  r0|dk(  j                         }|r|t        j                  ||z        z  S |dkD  rd	t        j
                  | d
z        z  S |S )Nr   gZӼ?g$C?g       @f8g      @r   gAl     l        )r7   r   r   r   loglog1p)r6   r,   r.   MalphaEVs          r   estimate_countr@   @   s    	QA 	RA a%!)m$E	Sahhtn--2244q8A
 	37{!VLLNrvva!e}$$<"((A2:...Hr   )__doc__
__future__r   numpyr   pandasr!   pandas.utilr   r   r2   r7   r@    r   r   <module>rG      s-   
 #   *!N6r   