
    ukiB                        d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ 	 	 	 	 d&dededededededeeef   fdZ	 	 	 	 d&dededededededeeef   fdZd Zd Z d Z!ddd Z"d! Z#d" Z$ ejJ                  d#      Z&de&_'        e&jQ                   eejR                  e&             e&jU                  e        ejV                  e& ee"d              ejV                  e&e"d$%       e#ejX                  e&<   e$ejZ                  e&<   y)'a  ANN (Approximate Nearest Neighbor) computes top-k with a configurable recall rate.

This package only optimizes the TPU backend. For other device types it fallbacks
to sort and slice.

Usage::

  import functools
  import jax

  # MIPS := maximal inner product search
  # Inputs:
  #   qy: f32[qy_size, feature_dim]
  #   db: f32[db_size, feature_dim]
  #
  # Returns:
  #   (f32[qy_size, k], i32[qy_size, k])
  @functools.partial(jax.jit, static_argnames=["k", "recall_target"])
  def mips(qy, db, k=10, recall_target=0.95):
    dists = jax.lax.dot(qy, db.transpose())
    # Computes max_k along the last dimension
    # returns (f32[qy_size, k], i32[qy_size, k])
    return jax.lax.approx_max_k(dists, k=k, recall_target=recall_target)

  # Multi-core example
  # Inputs:
  #   qy: f32[num_devices, qy_size, feature_dim]
  #   db: f32[num_devices, per_device_db_size, feature_dim]
  #   db_offset: i32[num_devices]
  #   db_size = num_devices * per_device_db_size
  #
  # Returns:
  #   (f32[qy_size, num_devices, k], i32[qy_size, num_devices, k])
  @functools.partial(
      jax.pmap,
      # static args: db_size, k, recall_target
      static_broadcasted_argnums=[3, 4, 5],
      out_axes=(1, 1))
  def pmap_mips(qy, db, db_offset, db_size, k, recall_target):
    dists = jax.lax.dot(qy, db.transpose())
    dists, neighbors = jax.lax.approx_max_k(
        dists, k=k, recall_target=recall_target,
        reduction_input_size_override=db_size)
    return (dists, neighbors + db_offset)

  # i32[qy_size, num_devices, k]
  pmap_neighbors = pmap_mips(qy, db, db_offset, db_size, 10, 0.95)[1]
  # i32[qy_size, num_devices * k]
  neighbors = jax.lax.collapse(pmap_neighbors, start_dimension=1, stop_dimension=3)

Todos::

  * On host top-k aggregation
  * Inaccurate but fast differentiation

    )partialN)ad_util)core)dispatch)dtypes)take_along_axis)ad)batching)mlir)_jax)ir)func)hlo)ArrayToperandkreduction_dimensionrecall_targetreduction_input_size_overrideaggregate_to_topkreturnc           	      :    t         j                  | |||d||      S )a  Returns max ``k`` values and their indices of the ``operand`` in an approximate manner.

  See https://arxiv.org/abs/2206.14286 for the algorithm details.

  Args:
    operand : Array to search for max-k. Must be a floating number type.
    k : Specifies the number of max-k.
    reduction_dimension : Integer dimension along which to search. Default: -1.
    recall_target : Recall target for the approximation.
    reduction_input_size_override : When set to a positive value, it overrides
      the size determined by ``operand[reduction_dim]`` for evaluating the
      recall. This option is useful when the given ``operand`` is only a subset
      of the overall computation in SPMD or distributed pipelines, where the
      true input size cannot be deferred by the operand shape.
    aggregate_to_topk : When true, aggregates approximate results to the top-k
      in sorted order. When false, returns the approximate results unsorted. In
      this case, the number of the approximate results is implementation defined
      and is greater or equal to the specified ``k``.

  Returns:
    Tuple of two arrays. The arrays are the max ``k`` values and the
    corresponding indices along the ``reduction_dimension`` of the input
    ``operand``. The arrays' dimensions are the same as the input ``operand``
    except for the ``reduction_dimension``: when ``aggregate_to_topk`` is true,
    the reduction dimension is ``k``; otherwise, it is greater equals to ``k``
    where the size is implementation-defined.

  We encourage users to wrap ``approx_max_k`` with jit. See the following
  example for maximal inner production search (MIPS):

  >>> import functools
  >>> import jax
  >>> import numpy as np
  >>> @functools.partial(jax.jit, static_argnames=["k", "recall_target"])
  ... def mips(qy, db, k=10, recall_target=0.95):
  ...   dists = jax.lax.dot(qy, db.transpose())
  ...   # returns (f32[qy_size, k], i32[qy_size, k])
  ...   return jax.lax.approx_max_k(dists, k=k, recall_target=recall_target)
  >>>
  >>> qy = jax.numpy.array(np.random.rand(50, 64))
  >>> db = jax.numpy.array(np.random.rand(1024, 64))
  >>> dot_products, neighbors = mips(qy, db, k=10)
  Tr   r   r   is_max_kr   r   approx_top_k_pbindr   r   r   r   r   r   s         K/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/lax/ann.pyapprox_max_kr    \   s2    b 
			-!$A) 
 
+ +    c           	      :    t         j                  | |||d||      S )a	  Returns min ``k`` values and their indices of the ``operand`` in an approximate manner.

  See https://arxiv.org/abs/2206.14286 for the algorithm details.

  Args:
    operand : Array to search for min-k. Must be a floating number type.
    k : Specifies the number of min-k.
    reduction_dimension: Integer dimension along which to search. Default: -1.
    recall_target: Recall target for the approximation.
    reduction_input_size_override : When set to a positive value, it overrides
      the size determined by ``operand[reduction_dim]`` for evaluating the
      recall. This option is useful when the given operand is only a subset of
      the overall computation in SPMD or distributed pipelines, where the true
      input size cannot be deferred by the ``operand`` shape.
    aggregate_to_topk : When true, aggregates approximate results to the top-k
      in sorted order. When false, returns the approximate results unsorted. In
      this case, the number of the approximate results is implementation defined
      and is greater or equal to the specified ``k``.

  Returns:
    Tuple of two arrays. The arrays are the least ``k`` values and the
    corresponding indices along the ``reduction_dimension`` of the input
    ``operand``.  The arrays' dimensions are the same as the input ``operand``
    except for the ``reduction_dimension``: when ``aggregate_to_topk`` is true,
    the reduction dimension is ``k``; otherwise, it is greater equals to ``k``
    where the size is implementation-defined.

  We encourage users to wrap ``approx_min_k`` with jit. See the following example
  for nearest neighbor search over the squared l2 distance:

  >>> import functools
  >>> import jax
  >>> import numpy as np
  >>> @functools.partial(jax.jit, static_argnames=["k", "recall_target"])
  ... def l2_ann(qy, db, half_db_norms, k=10, recall_target=0.95):
  ...   dists = half_db_norms - jax.lax.dot(qy, db.transpose())
  ...   return jax.lax.approx_min_k(dists, k=k, recall_target=recall_target)
  >>>
  >>> qy = jax.numpy.array(np.random.rand(50, 64))
  >>> db = jax.numpy.array(np.random.rand(1024, 64))
  >>> half_db_norm_sq = jax.numpy.linalg.norm(db, axis=1)**2 / 2
  >>> dists, neighbors = l2_ann(qy, db, half_db_norm_sq, k=10)

  In the example above, we compute ``db^2/2 - dot(qy, db^T)`` instead of
  ``qy^2 - 2 dot(qy, db^T) + db^2`` for performance reason. The former uses less
  arithmetic and produces the same set of neighbors.
  Fr   r   r   s         r   approx_min_kr#      s2    j 
			-!$A) 
 
+ +r!   c                   |dk  rt        d|       t        | j                        dk(  r$t        dj	                  | j                              t        | j                        }||   |k  rt        dj	                  ||   |            t        j                  | j                  t        j                        st        d      ||   }|r|||<   nSt        j                  ||f      r*t        j                  |t        |      ||||      d   ||<   nt        d| d| d      | j                   }	|	j"                  |   *t        j$                  d	| d
| j'                          d      | j)                  || j                  | j*                  | j,                  |	      | j)                  |t        j                  t        j.                        | j,                  |	      fS )Nr   zk must be positive, got z5approx_top_k operand must have >= 1 dimension, got {}z;k must be smaller than the size of reduction_dim {}, got {}zoperand must be a floating typezSapprox_top_k with aggregate_to_topk=False not yet implemented when either the `k` (z$) or the  reduction dimension size (z) are symboliczreduction dimension z in operand z@ should be unsharded i.e. the spec of that dim should be `None`.)shapedtype	weak_typevmasharding)r%   r&   r(   r)   )
ValueErrorlenr%   	TypeErrorformatlistr   
issubdtyper&   npfloatingr   is_constant_shaper   "approx_top_k_reduction_output_sizeNotImplementedErrorr)   specShardingTypeError	str_shortupdater'   r(   int32)
r   r   r   r   r   r   r   dimsreduction_input_size	operand_ss
             r   _approx_top_k_abstract_evalr=      s    !V
/s3
441
KRR  	gmm	$	
"
ELL$%q	*+ + 
		7=="++	6
6
7712 !D	3Q78 $ G Gc$iM;L%!''(!*D	 
3 '';&<N
LM M )^^'(4

 
 
23 4  !	  ..t7==#*#4#4'++"+  - ..t288BHH+=7;;"+  -
. .r!   c                 ~    t        j                  |rt         j                   |       S t         j                  |       S )N)r&   )r0   arrayinf)op_typer   s     r   _get_init_val_literalrB      s)    	X266'	AA266	AAr!   c                    t         j                  j                  g |      }t         j                  j                  g t         j                  j	                  d            }||||g}t         j                  j                  g t         j                  j	                  d            g}t         j
                  j                  ||      }t         j                  j                  | j                  j                  j                        5  t        j                  dj                  |rdnd|      |      }d d d        | j                  j                  j                         |j!                         }	t        j                  |	      5  |	j"                  \  }
}}}t$        j&                  j                  |rdnd      }t%        j(                  |
||      }t%        j*                  |g       d d d        |S # 1 sw Y   xY w# 1 sw Y   |S xY w)	N       ztop_k_{}_{}_comparatorgtltGTLT)comparison_direction)r   RankedTensorTypegetIntegerTypeget_signlessFunctionTypeInsertionPointat_block_beginmodule_contextmodulebodyr   FuncOpr-   symbol_tableinsertadd_entry_block	argumentsr   ComparisonDirectionAttrcomparereturn_)ctxrA   r   scalarindexir_typesresult_typescomparator_type
comparatorentry_blockp0p1_	direction
cmp_results                  r   _comparator_builder_mlirrj     s   ""2w/&



!
!"bnn&A&A"&E
F%feU+(%%))"bnn.I.I!.LMN,OO'',?/	''(:(:(A(A(F(FG  ''dGLJ !!((4**,+	% ((LBAq++//dKIR)DJKK	 
  
s   ?+G)<A#G5)G25G?F)fallbackc          	         | j                   sJ t        d | j                   D              sJ | j                   d   j                  }	t        |	      dk(  rt	        d|	       |	}
t        j                  | j                   d   j                        }t        j                  j                         }|dk  rt        |
      |z   }t        | ||      }t        j                  | t        j                  | j                   d   j                  t        j                         |      }t#        j$                  t        j&                  j                  t        j                   d                  }t)        | j                   d   j                  |      }t        j*                  |j-                  d            }t        j.                  |      t
        j                  j0                  j                  ||      t
        j                  j2                  j                  |      t        j.                  |      d}|r,t
        j                  j2                  j                  |      |d<   t        d	 | j4                  D              rd }nJ| j4                  D cg c]5  }t        j6                  t        j8                  | |j                              7 }}t        j:                  |      rt        j.                  |      |d
<   t        j<                  d| j4                  D cg c]  }t        j>                  |       c}||||g|j@                  jB                  g||      }|jF                  S t        jD                  | |f      \  }t        j<                  d| j4                  D cg c]  }t        j>                  |       c}|||||g|j@                  jB                  g||      }|jF                  S c c}w c c}w c c}w )Nc              3   P   K   | ]  }t        |t        j                           y wN)
isinstancer   ShapedArray).0xs     r   	<genexpr>z)_approx_top_k_lowering.<locals>.<genexpr>  s     CZ4++,Cs   $&r   z"operand must be an array, but was )	dimension )reduction_dimr   r   r   is_fallbackc              3   Z   K   | ]#  }t        j                  |j                         % y wrn   )r   r2   r%   )rq   aval_outs     r   rs   z)_approx_top_k_lowering.<locals>.<genexpr>9  s     NH			/Ns   )+top_k
ApproxTopK)ra   operandscalled_computationsbackend_configresult_shapeszstablehlo.dynamic_approx_top_k)$avals_inallr%   r+   r*   r   dtype_to_ir_typer&   r   F32TyperL   rj   iotar   rp   r0   r9   r   constantDenseElementsAttrrB   ir_constantreshapei64_attr	FloatAttrBoolAttr	avals_outshape_tensoreval_dynamic_shapeis_constant_dimcustom_callaval_to_ir_typenamevalueeval_dynamic_shape_as_valsresults)r]   r   r   r   r   r   r   r   rk   op_shapeop_dimsrA   recall_typerc   r   init_arginit_val_arrayinit_valr   r   rz   avaloutk_values                           r   _approx_top_k_loweringr     s    
	CcllC	CC	C\\!_""(]a
9(D
EE'!!#,,q/"7"78'

 +1g,)<<'Wh?*	3((a)>)>I0
2$ \\"..22288B<@A((a)>)>I.n44R89( mm$78gg''++KG''**../@A
mm124. $(GG$4$4$8$8$BN=!NNNM ' 	$11#x~~FG'M ' 
!"mmA.N7


=@]]KTd**40K484'__223%#%C" 
 ..sQD9HG


(=@]]KTd**40K48W='__223%#%C 
/' L Ls   :O	O 
O%
c          	          t        |       dk(  sJ t        |      dk(  sJ | \  }|\  }	t        |j                        D 
cg c]	  }
|
|	us|
 }}
||   }t        j	                  |||||||      |	|	ffS c c}
w )NrE   r   )r+   rangendimr   r   )batch_operands
batch_axesr   r   r   r   r   r   r   
batch_axisddim_maps               r   _approx_top_k_batch_ruler   U  s     
^		!!	!	ZA		('+*gll+C1q
/BQC'C 34				-!$A) 
 
+ .8,D
E E Ds    	A5
A5c                   | \  }|\  }	|rt        ||||||      \  }
}nt        ||||||      \  }
}t        |	      t        j                  u r t        j                  j                  |
      }n/|j                  }t        |      }|dk  r||z  }t        |	||      }|
|f|t        j                  j                  |      ffS )Nr   )axis)	r    r#   typer   Zerofrom_primal_valuer%   r+   r   )primalstangentsr   r   r   r   r   r   r   tangentval_outarg_outtangent_out	arg_shaperanks                  r   _approx_top_k_jvpr   p  s     ('('#GQ0C$1$A$57GW
 $GQ0C$1$A$57GW 
']gll",,009KIy>DQT!!'79LMK
7	k7<<+I+I'+RS	SSr!   approx_top_ktpu)platform)ru   gffffff?ru   T).__doc__	functoolsr   numpyr0   jax._srcr   r   r   r   jax._src.numpy.indexingr   jax._src.interpretersr	   r
   r   jax._src.libr   jax._src.lib.mlirr   jax._src.lib.mlir.dialectsr   r   jax._src.typingr   intfloatbooltupler    r#   r=   rB   rj   r   r   r   	Primitiver   multiple_resultsdef_implapply_primitivedef_abstract_evalregister_loweringprimitive_batchersprimitive_jvpsrv   r!   r   <module>r      s  7r       3 $ * &    + * !
 -/(,68+/8+% 8+8+&)8+ !&8+ 14	8+
 %)8+
 5:%,4G8+z -/(,68+/<+% <+<+&)<+ !&<+ 14	<+
 %)<+
 5:%,4G<+~&.PB2 ?D<|E6T4  /"&     8 8.I J     !< =   ~4tDF   ~'=!&(.F  N +$5  . !r!   