
    bix                     
   d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlZddlZddlm	Z ddlmZ dd	lmZ d
dl m!Z!m"Z" d
dl#m$Z$m%Z% d
dl&m'Z'm(Z( d
dl)m*Z+ e
rd
dl,m-Z. neZ. ej^                  d      Z0e0jc                  dd      Z2de3de3deeejh                  ejh                  f   ddf   fdZ5defdZ6defdZ7deddfdZ8e2jr                  deejh                  ejh                  f   fd       Z:e2jr                  deejh                  ejh                  f   fd       Z;e2jr                  deejh                  ejh                  f   fd       Z<e2jr                  deejh                  ejh                  f   fd       Z=e2jr                  dee.ejh                  f   fd       Z>e2jr                  d e?deej                  ejh                  ejh                  ej                  ejh                  ejh                  ej                  ejh                  ejh                  f	   fd!       ZA	 dSd"d#d$d%e3de3d&e3d'eBd(eBd)e3deeejh                     eejh                     eejh                     f   fd*ZCeej                  ej                  ej                     ej                  ej                     f   ZFe G d+ d,             ZG G d- d.e      ZH G d/ d0      ZId1ej                  ej                     deej                  ej                  ej                  f   fd2ZJ	 dTd3ej                  d4ej                  ej                     d5ej                  ej                     d6eKdej                  ej                     f
d7ZMd8eej                  ej                  ej                     ej                  ej                     f   d9ej                  ej                     deGfd:ZNd;eHdeeGeeG   f   fd<ZOd3ej                  d4ej                  ej                     d5ej                  ej                     d=ej                  ej                     d>ej                  ej                     deej                  ej                  ej                     ej                  ej                     ej                  ej                     f   fd?ZQd@edAee!   dBe?ddfdCZRe2jr                  de3de3dDeKdEeBdeeej                     ejh                  f   f
dF       ZSdGe3dHe3dee?   fdIZTdJdKd"d#ej                  dLde3de3dMe3dNeBdDeKdOeKdPeBd)e3dQej                  j                  dee'ejh                  f   fdRZVy)UzUtilities for data generation.    N)ThreadPoolExecutor)	dataclass)TYPE_CHECKINGAnyCallableDict	GeneratorList
NamedTupleOptionalSetTupleTypeUnion)request)typing)r	   )sparse   )DMatrixQuantileDMatrix)is_pd_cat_dtypepandas_pyarrow_mapper)	ArrayLike	XGBRanker)train)	DataFramejoblibz
./cachedir)verbose	n_samples
n_featuresreturnc              #     K   t        j                  d      }t        j                  j	                  d      }|j                  dd| |z        j                  | |      }t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                   t        j"                  t        j$                  t        j&                  t        j(                  t        j*                  t        j,                  t        j.                  t        j0                  t        j2                  t        j4                  g}|D ]A  }t        j6                  ||      }||f |j9                         |j9                         f C |D ]A  }t        j6                  ||      }|j;                  |      }|j;                  |      }	||	f C |j=                  dd| |z  	      j                  | |      }t        j>                  t@        fD ]  }t        j6                  ||      }||f ! t        j>                  t@        fD ]A  }t        j6                  ||      }|j;                  |      }|j;                  |      }	||	f C y
w)z*Enumerate all supported dtypes from numpy.pandas  r      lowhighsizedtype   g      ?r)   N)!pytestimportorskipnprandomRandomStaterandintreshapeint32int64byteshortintcint_longlonguint32uint64ubyteushortuintcuint	ulonglongfloat16float32float64halfsingledoublearraytolistr   binomialbool_bool)
r   r    pdrngorigdtypesr+   Xdf_origdfs
             O/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/xgboost/testing/data.py	np_dtypesrV   -   s     
		X	&B
))


%C;;13Y-C;DLL:D 	






		
		

		













		
		)F,  (HHT'AgkkmQXXZ''(
  HHT',,t$\\!_rk	 <<3Y%;<<DD:D ((D! HHT'Ag ((D! HHT',,t$\\!_rk	s   KKc            	   #     K   t        j                  d      } | j                         | j                         | j	                         | j                         | j                         | j                         | j                         | j                         g}t        j                  }| j                  dd|dgdd|dgdt        j                        }t        j                  d| j                  fD ]-  }|D ]&  }| j                  dd|dgdd|dgd|      }||f ( / t        j                  }| j                         | j!                         g}| j                  d	d
|dgdd
|d	gdt        j                        }t        j                  d| j                  fD ]m  }|D ]f  }| j                  d	d
|dgdd
|d	gd|      }||f |d   }|d   }t#        || j$                        sJ t#        || j$                        sJ ||f h o |j'                  d      }t        j                  d| j                  fD ]4  }| j                  d	d
|dgdd
|d	gd| j)                               }||f 6 d| j                  fD ]i  }dd|dgdd|dgd}| j                  ||t        j*                  n| j-                               }| j                  || j-                               }||f k yw)z/Enumerate all supported pandas extension types.r#   r,   r         f0f1r*   N      ?g       @g      @r[   categoryTF)r.   r/   
UInt8DtypeUInt16DtypeUInt32DtypeUInt64Dtype	Int8Dtype
Int16Dtype
Int32Dtype
Int64Dtyper0   nanr   rD   NAFloat32DtypeFloat64Dtype
isinstanceSeriesastypeCategoricalDtyperL   BooleanDtype)	rN   rQ   NullrP   r+   rT   ser_origserdatas	            rU   	pd_dtypesrt   h   s    			X	&B 	






	F %'FFD<<1dAq!T1o6bjj  D ruu%  	E1dAq!T1o>e  B (N		 66Doo!23F<<S$$S#tS,AB"**  D ruu% 
  		 ES$,S#tS4IJRW  B (NDzHT(Cc299---h		222C-		 
  ;;z"Druu% \\dC(c40EF%%'  
 Bh ruu UD$/tT47PQ||DDLbooFW|X\\$boo&7\8Bhs   K"K$c            	   #     K   t        j                  d      } t        j                  d      }t        }d| j                  dfD ]  }|D ]  }|j	                  d      s|j	                  d      r&| j                  |      s|dk(  r|nt        j                  }| j                  dd|d	gd
d	|dgdt        j                        }| j                  dd|d	gd
d	|dgd|      }||f   | j                  dfD ]o  }| j                  dd|dgdd|dgd| j                               }| j                  dd|dgdd|dgd| j                  |j                                     }||f q yw)z*Pandas DataFrame with pyarrow backed type.r#   pyarrowNr   rC   rM   r,   r   rX   rY   rZ   r*   FT)r.   r/   r   rh   
startswithisnar0   rg   r   rD   ro   
ArrowDtyperL   )rN   parQ   rp   r+   	orig_nullrP   rT   s           rU   pd_arrow_dtypesr|      s    			X	&B			Y	'B #F. ruua   	E	*e.>.>v.F$&GGDMdaiRVVI<<1i+Aq)Q3GHjj   D
 1dAq!T1o>e  B (N	"  	||%t,UD$4MN//#  
 \\%t,UD$4MN--
+  
 Bh	s   E(E*rO   c                    | j                  d      j                  dd      }| j                  d      }t        j                  |d<   t	        j
                  t        d      5  t        ||       ddd       t	        j
                  t        d      5  t        ||       ddd       y# 1 sw Y   :xY w# 1 sw Y   yxY w)	zValidate there's no inf in X.    r-      rY   )   r   zInput data contains `inf`matchN)	r1   r4   r0   infr.   raises
ValueErrorr   r   )rO   rR   ys      rU   	check_infr      s    


##Aq)A


AffAdG	z)D	E 1 
z)D	E 1   s   #B)B5)B25B>c                  |    t        j                  d      } | j                         }|j                  |j                  fS )z2Fetch the California housing dataset from sklearn.sklearn.datasets)r.   r/   fetch_california_housingrs   targetdatasetsrs   s     rU   get_california_housingr      s6     ""#56H,,.D99dkk!!    c                  |    t        j                  d      } | j                         }|j                  |j                  fS )z&Fetch the digits dataset from sklearn.r   )r.   r/   load_digitsrs   r   r   s     rU   
get_digitsr      s6     ""#56H!D99dkk!!r   c                  P    t        j                  d      } | j                  d      S )z-Fetch the breast cancer dataset from sklearn.r   T)
return_X_y)r.   r/   load_breast_cancer)r   s    rU   
get_cancerr      s)     ""#56H&&$&77r   c                     t        j                  d      } t        j                  j	                  d      }d}d}| j                  ||      \  }}|j                  d||j                        }t        |j                  d         D ]<  }t        |j                  d         D ]  }|||f   st        j                  |||f<   ! > ||fS )zGenerate a sparse dataset.r      i  g      ?)random_stater,   r   )
r.   r/   r0   r1   r2   make_regressionrK   shaperangerg   )	r   rO   nsparsityrR   r   flagijs	            rU   
get_sparser     s     ""#56H
))


$CAH##AC#8DAq<<8QWW-D1771: !qwwqz" 	!AAqDz&&!Q$	!! a4Kr   c                     t         rddlnt        j                  d      t        j
                  j                  d      dj                         } dt        t        t        t        f   t        f   dt        dj                  ffd	} |d
dddddd      | d<    |ddddd      | d<    |dddddd      | d<    |ddd d!d"d#d$d%d      | d&<    |d'd(d)d!d*d+      | d,<    |d-d(d.d/d0d"d1d2d3d      | d4<    |d5d6d7d8d9d:d;      | d<<    |d=d>d?d@d$dAd      | dB<    |dCdDdd"dEd      | dF<    |d@dGdGdHdI      | dJ<   dKt        dLt        dt        dj                  ffdM} |dNdOd      | dP<    |dQdRd      | dS<    |dTdUd      | dV<    |dWdXd      | dY<    |dZd[d      | d\<    |d]d^d      | d_<    |d`dad      | db<    |dcddd      | de<    |dfdgd      | dh<    |didjd      | dk<   t        | j                        }j                  |       | |   } t	        j                   fl      }| j                  D ]q  }t#        | |   j$                  j&                        r:|| |   j(                  j*                  j-                  t        j.                        z  }`|| |   j0                  z  }s |dm|j3                         z  z  }|dn|j5                         z
  z  }| |fS )oam  Get a synthetic version of the amse housing dataset.

    The real one can be obtained via:

    .. code-block::

        from sklearn import datasets

        datasets.fetch_openml(data_id=42165, as_frame=True, return_X_y=True)

    Number of samples: 1460
    Number of features: 20
    Number of categorical features: 10
    Number of numerical features: 10
    r   Nr#   r$   i  
name_probadensityr!   c           	         t        	d|z
  z        }t        j                  d|z
        dkD  xr |dkD  }|rd|z
  }|| t        j                  <   t	        | j                               }t	        | j                               }|dxx   dt        j                  |      z
  z  cc<   j                  |	|      }
j                  |
j                  t        d |                  }|S )	Nr,   r]   ư>r   )r)   pc                 "    t        | t              S N)rk   str)xs    rU   <lambda>z5get_ames_housing.<locals>.synth_cat.<locals>.<lambda>B  s    As!3 r   r*   )intr0   absrg   listkeysvaluessumchoicerl   rn   filter)r   r   n_nullshas_nanr   r   r   r   seriesr   rN   rO   s            rU   	synth_catz#get_ames_housing.<locals>.synth_cat0  s     i1w;/0&&w'$.>7Q;W}H!)JrvvJOO%&""$%	"rvvay JJt)qJ1%%3T:  
 r   gqu ]?gqh.?gsmB<?g5C(?goEb?)1Fam2fmConDuplexTwnhsTwnhsEr]   BldgTypegwD?g. ҥ?g)$;?)UnfRFnFing_9?GarageFinishgW歺?gbFx{?gbFx{?gQfL2rf?)CornerCulDSacFR2FR3	LotConfigg?g/ؗ?gf׽?g$A
?g5e?g() l?g[iF?)TypMin2Min1ModMaj1Maj2Sev
Functionalg M?g?gMq?)NoneBrkFaceStoneBrkCmng3f?
MasVnrTypeg3f?gI/j ?g,	PS˦?ge@?gQ~?gZ	%qv?)1Story2Storyz1.5FinSLvlSFoyerz1.5Unfz2.5Unfz2.5Fin
HouseStyleg$	P?gHp?gK$?gՐ?g4*p?)GdTAFaExPogE`o?FireplaceQugș&l??皙?g5e?gunڌ`?)r   r   r   r   r   	ExterCondgn0a?g{gUId?)r   r   r   r   	ExterQualg8 nV?)r   r   r   g(xߢs?PoolQClocstdc                    j                  | |      }t        d|z
  z        }t        j                  d|z
        dkD  r,|dkD  r'j	                  |d      }t        j
                  ||<   j                  |t        j                        S )	Nr   scaler)   r,   r]   r   r   Fr)   replacer*   )normalr   r0   r   r   rg   rl   rE   )	r   r   r   r   r   null_idxr   rN   rO   s	         rU   	synth_numz#get_ames_housing.<locals>.synth_num  s}    JJ3c	J:i1w;/066#- 4'GaKzz)'5zIH&&AhKyy"**y--r   gmtF@gOfK<Q=@	3SsnPorchgݹsΝ?g2Tf?
FireplacesgR u?gP$[r?BsmtHalfBathgvS?g_-?HalfBathgbĈ#F?g+?
GarageCarsg$[Q<@g"$#e?TotRmsAbvGrdg$[Q<{@g%Ǒ|@
BsmtFinSF1ge0OFG@g*Ӛ{7*d@
BsmtFinSF2gNڭ@gCk@	GrLivAreagg6.@gK@ScreenPorchr   g(e@g.A)r   r#   r.   r/   r0   r1   default_rngr   r   r   r   floatrl   r   columnsshufflezerosrk   r+   rn   catcodesrm   rE   r   r   mean)	rT   r   r   r   r   cr   rN   rO   s	         @@@rU   get_ames_housingr    s   "   *
))


%CI	BsEz*E12=B	. 	
 		BzN #(;WB~  		
 	B{O !	
 	B| !		
 	B| !		
 	B| "	
 		B}  	
 		B{O  		
 	B{O 	

 	BxL.u .5 .5 .RYY .   24EsKB{O !24FLB|"#79LcRB~24FLBzN !35GMB|"#46H#NB~ !24EsKB| !24FLB| 13DcJB{O!"46H#NB}2::GKK	GB 		|$AZZ bekk2#6#67A''

33AAA	 	QUUW	$$A	affh	&&Aq5Lr   dpathc           	      b   t        j                  d      }d}t        j                  j	                  | d      }t        j                  j                  |      st        j                  ||       t        j                  |d      5 }|j                  |        ddd       |j                  t        j                  j	                  | d      t        j                  j	                  | d	      t        j                  j	                  | d
      fdd      \	  }}}}}	}
}}}|||||	|
|||f	S # 1 sw Y   xY w)zFetch the mq2008 dataset.r   z>https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zipz
MQ2008.zip)urlfilenamer)pathNzMQ2008/Fold1/train.txtzMQ2008/Fold1/test.txtzMQ2008/Fold1/vali.txtTF)query_id
zero_based)r.   r/   osr
  joinexistsr   urlretrievezipfileZipFile
extractallload_svmlight_files)r  r   srcr   fx_trainy_train	qid_trainx_testy_testqid_testx_validy_valid	qid_valids                 rU   
get_mq2008r     s#    ""#56H
JCWW\\%.F77>>&!f5		% !	% ! 	$$GGLL 89GGLL 78GGLL 78	

  	% 	
 	
 
/! !s   D%%D.Fr$   )	vary_sizer   n_samples_per_batch	n_batchesuse_cupyr!  r   c                   g }g }g }|r3ddl }	|	j                  j                  t        j                  |            }
nt        j                  j                  |      }
t        |      D ]x  }|r| |dz  z   n| }|
j                  ||      }|
j                  |      }|
j                  dd|      }|j                  |       |j                  |       |j                  |       z |||fS )zMake batches of dense data.r   N
   r,   r&   )	cupyr1   r2   r0   r=   r   randnuniformappend)r"  r    r#  r$  r!  r   rR   r   wr'  rO   r   r   _X_y_ws                   rU   make_batchesr/    s     	A
A
Akk%%bii&=>ii##L19 4='!b&0CV	YYy*-YYy![[QQY[7			 a7Nr   c                   H   e Zd ZU dZej
                  ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   y)		ClickFoldzCA structure containing information about generated user-click data.rR   r   qidscoreclickposN)__name__
__module____qualname____doc__r   
csr_matrix__annotations__nptNDArrayr0   r5   rD   r6    r   rU   r1  r1  '  sp    M
{{288	RXX	;;rzz"";;rxx  	RXX	r   r1  c                   <    e Zd ZU dZeed<   eed<   eed<   defdZy)	RelDataCVzPSimple data struct for holding a train-test split of a learning to rank dataset.r   testmax_relr!   c                      | j                   dk(  S )z6Whether the label consists of binary relevance degree.r,   )rB  )selfs    rU   	is_binaryzRelDataCV.is_binary:  s    ||q  r   N)	r6  r7  r8  r9  RelDatar;  r   rM   rE  r>  r   rU   r@  r@  3  s     ZN
ML!4 !r   r@  c                       e Zd ZdZdeddfdZdej                  ej                     dej                  ej                     dej                  ej                     fdZy)	PBMa  Simulate click data with position bias model. There are other models available in
    `ULTRA <https://github.com/ULTR-Community/ULTRA.git>`_ like the cascading model.

    References
    ----------
    Unbiased LambdaMART: An Unbiased Pairwise Learning-to-Rank Algorithm

    etar!   Nc                     t        j                  g d      | _        t        j                  g d      }t        j                  ||      | _        y )N)r   g{Gz?Q?gp=
ף?r]   )
g(\?gQ?gQ?g(\?rK  皙?g)\(?r   g{Gz?gQ?)r0   rI   
click_probpower	exam_prob)rD  rI  rO  s      rU   __init__zPBM.__init__I  s8    ((#?@HHH
	 )S1r   labelspositionc                    t        j                  |d      }t        j                  |j                        }d||dk  <   d||t	        | j
                        k\  <   | j
                  |   }t        j                  |j                        }|j                  |j                  k(  sJ t        j                  |d      }d||| j                  j                  k\  <   | j                  |   }t         j                  j                  d      }|j                  |j                  d   t         j                        }t        j                  |j                  t         j                        }d||||z  k  <   |S )	zSample clicks for one query based on input relevance degree and position.

        Parameters
        ----------

        labels :
            relevance_degree

        T)copyr   r   r$   )r)   r+   r*   r,   )r0   rI   r   r   lenrM  r)   rO  r1   r   rD   r5   )	rD  rQ  rR  rM  rO  ranksrO   probclickss	            rU   sample_clicks_for_queryzPBM.sample_clicks_for_queryR  s    &t,XXfll+
vz13vT__--.__V,
HHV\\*	}}+++-.0et~~***+NN5)	ii##D)zzv||AbjjzA(*RXX(N01ti*,,-r   )r6  r7  r8  r9  r   rP  r<  r=  r0   r5   r6   rY  r>  r   rU   rH  rH  ?  s^    2E 2d 2!kk"((+!7:{{2887L!	RXX	!r   rH  r   c           
         t        j                  |       } | j                  }t         j                  dt        j                  t        j
                  | dd | dd d             dz   f   }t        j                  t         j                  ||f         }| |   }t        j                  |t        j                  | j                  g            }|||fS )zzRun length encoding using numpy, modified from:
    https://gist.github.com/nvictus/66627b580c13068589957d6ab0919e66

    r   r,   Nr   T)	equal_nan)	r0   asarrayr)   r_flatnonzeroisclosediffr*  rI   )r   r   startslengthsr   indptrs         rU   rlencoderd  v  s    
 	

1A	AUU1bnnbjj12#2$&O%OPSTTTUFggbeeFAI&'GvYFYYvrxx12F7F""r   rR   r   r2  sample_ratec                    t         j                  j                  d      }t        | j                  d   |z        }t        j
                  d| j                  d   t         j                        }|j                  |       |d| }| |   }||   }||   }	t        j                  |	      }
||
   }||
   }|	|
   }	t        dd      }|j                  |||	       |j                  |       }|S )	zWe use XGBoost to generate the initial score instead of SVMRank for
    simplicity. Sample rate is set to 0.1 by default so that we can test with small
    datasets.

    r$   r   r*   Nz	rank:ndcghist)	objectivetree_method)r2  )r0   r1   r   r   r   aranger=   r   argsortr   fitpredict)rR   r   r2  re  rO   r   indexX_trainr  r  
sorted_idxltrscoress                rU   init_rank_scorers    s     ))


%CAGGAJ,-IIIa2995EKK*9EhGhGE
I I&Jj!Gj!G*%I
kv
>CGGGW)G, [[^FMr   foldscores_foldc                 4   | \  }}}|j                   t        j                  k(  sJ t        j                  |      }t        j                  |j
                  ft        j                        }t        j                  |j
                  ft        j                        }t        d      }|D ]f  }	|	|k(  }
|
j                  |
j                  d         }
||
   }t        j                  |      ddd   }|||
<   ||
   }|j                  ||      }|||
<   h |j                  d   |j                  d   k(  sJ |j                  |j                  f       |j                  d   |j                  d   k(  sJ |j                  |j                  f       t        ||||||      S )zSimulate clicks for one fold.r*   r]   )rI  r   Nr   )r+   r0   r5   uniqueemptyr)   r6   rH  r4   r   rk  rY  r1  )rt  ru  X_foldy_foldqid_foldqidsrR  rX  pbmqqid_maskquery_scoresquery_positionrelevance_degreesquery_clickss                  rU   simulate_one_foldr    so   
  $FFH>>RXX%%%99XDxxbhh7HXXv{{nBHH5F
#,C  
(=##HNN1$56"8,L1$B$7+"8,223DnU'x
( <<?hnnQ//O&,,1OO/<<?fll1o-Kfll/KK-VVX{FHMMr   cv_datac           	      d   t        t        | j                  | j                              \  }}}t	        j
                  dg|D cg c]  }|j                  d    c}z         }t	        j                  |      }t        |      dk(  sJ t        j                  |      }t	        j                  |      }t	        j                  |      }t        |||      }	t        d|j                        D 
cg c]  }
|	||
dz
     ||
     }}
g g g g g g f\  t        |j                  dz
        D ]  }
t        ||
   ||
   ||
   f||
         }j!                  |j"                         j!                  |j$                         j!                  |j&                         j!                  |j(                         j!                  |j*                         j!                  |j,                          t        |j                  dz
        D 
cg c]  }
|
   	 }}
t        d      D ]  }
||
   ||
   k(  j/                         rJ  t              dk(  r(t1        d   d   d   d   d   d         }d}||fS fdt        t                    D        \  }}||fS c c}w c c}
w c c}
w )z6Simulate click data using position biased model (PBM).r   rX   r,   r   Nc           
   3   b   K   | ]&  }t        |   |   |   |   |   |          ( y wr   )r1  ).0r   X_lstc_lstp_lstq_lsts_lsty_lsts     rU   	<genexpr>z"simulate_clicks.<locals>.<genexpr>  s@      
 eAha%(E!HeAhaQ
s   ,/)r   zipr   rA  r0   rI   r   cumsumrU  r   vstackconcatenaters  r   r)   r  r*  rR   r   r2  r3  r4  r5  allr1  )r  rR   r   r2  vrc  X_fully_fullqid_fullscores_fullr   rr  rt  scores_check_1r   rA  r  r  r  r  r  r  s                   @@@@@@rU   simulate_clicksr    sx   S56IAq# XXqc3AQWWQZ334FYYvFv;%]]1F^^AF~~c"H "&&(;K>CAv{{>STk&Q-&)4TFT/12r2r2/E,E5%u6;;?#  !A$!c!f!5vayATVVTVVTXXTZZ TZZ TXX ).fkkAo(>?1eAh?N?1X 6q!VAY.335556 5zQ%(E!HeAha%(ERSHU $;	
 
3u:&
t $;G 4 U @s   	J#
1J(J-rX  r5  c           
         t        j                  |      }| |   } ||   }||   }||   }t        |      \  }}}t        d|j                        D ]  }||dz
     }	||   }
|	|
k  s	J |	|
f       t        j
                  ||	|
       j                  dk(  s	J |	|
f       ||	|
 }|j                         dk(  sJ |j                                |j                         |j                  dz
  k\  s9J |j                         |j                  |t        j
                  ||	|
       f       t        j                  |      }| |	|
 |   | |	|
 ||	|
 |   ||	|
 ||	|
 |   ||	|
 ||	|
 |   ||	|
  | |||f}|S )z,Sort data based on query index and position.r,   r   )r0   rk  rd  r   r)   rw  minmax)rR   r   r2  rX  r5  rp  rc  _r   begend	query_posrs   s                rU   sort_ltr_samplesr    s    CJ	*AJF
j/C
j/CC=LFAq1fkk" 0QUmQiSy$3*$yyyS&++q0<3*<0CL	}}!#4Y]]_4#}})..1"44 	
MMONNIIc#cl#	7
 	
4 ZZ	*
s3Z
+#c
 S/*5s3s3Z
+#c
3s|J/C+0. faDKr   DTypeDMatrixTdevicec                    t         j                  j                         } | |j                  ddd      j	                  t         j
                        j                  dd            }t        |d      r|j                  dddf   }n	|dddf   }|} ||||	      }t        j                  t        d
      5  t        d|d|       ddd       t        |d      s | |j                         j                  dd            }||k(  j                         sJ |j                  j                   j"                  du sJ |j                  j                   j$                  du sJ |j'                  |j                  	        | |j                         j                  dd            }||j                  k(  j                         sJ |}|j)                  |       |j                         }	|j)                  |j                  d|j*                               |j                         }
|
|	k(  j                         sJ |j	                  t         j,                        }|j)                  |       |j                         }||	k(  j                         sJ |j                  dddd      }t        j                  t        d
      5  |j)                  |       ddd       yy# 1 sw Y   xY w# 1 sw Y   yxY w)zRun tests for base margin.r   r]   d   r-   2   r   ilocN)base_marginz.*base_margin.*r   rg  )ri  r  FTr,   r   )r0   r1   r   r   rm   rD   r4   hasattrr  r.   r   r   train_fnget_base_marginr  Tflagsc_contiguousf_contiguousset_infoset_base_marginr)   rE   )r  r  r  rO   rR   r   r  Xygotbm_colbm_rowbm_f64s               rU   run_base_margin_infor  (  s   
))


!CcjjCcj*11"**=EEb!LMAq&FF1a4LadGK	!QK	0B	z);	< @6:B?@ 1fB&&(00Q78{"'')))}}""//5888}}""//4777
.B&&(00B78{}}$))+++ 
;'##%
;..q+2B2BCD##%& %%''' "((4
;'##%& %%''' ii1a+]]:-?@ 	,{+	, 	,7 @ @<	, 	,s   7K7K K K)r   as_densec                     t        t        j                  d      sTt        j                  j                  d      }t	        j                   dz
  |d      }|j                  dd       }||fS t        t        j                               dt        d	t        j                  f fd
}g }t              5 }	t              D ]#  }
|j                  |	j                  ||
             % 	 ddd       g }g }|D ]7  }|j                         \  }}|j                  |       |j                  |       9 t!        |      k(  sJ t	        j"                  |d      }t        j$                  |      }|j'                  |j(                  d   |j(                  d   f      j*                  }t        j,                  |d      }|j(                  d    k(  sJ |j(                  d   k(  sJ |j(                  d    k(  sJ |rR|j/                         }|j(                  d    k(  sJ |j(                  d   k(  sJ t        j0                  ||dk(  <   ||fS ||fS # 1 sw Y   hxY w)zMake sparse matrix.

    Parameters
    ----------

    as_dense:

      Return the matrix as np.ndarray with missing values filled by NaN

    r   r$   r]   csr)mr   r   r   format        r   t_idr!   c                    t         j                  j                  d| z        }
z  }| 
dz
  k(  r	| |z  z
  }n|}t        j                  	|dz
  |      j	                         }t        j
                  	df      }t        |j                  d         D ][  }|j                  |dz      |j                  |   z
  }|dk7  s+||d d |f   j                         |j                  	df      z  dz  z  }] ||fS )Nr$   r,   r]   )r  r   r   r   r   rL  )
r0   r1   r   r   tocscr   r   r   rc  toarray)r  rO   thread_sizen_features_tlocrR   r   r   r)   r    r   	n_threadsr   s           rU   
random_cscz*make_sparse_regression.<locals>.random_cscu  s   ii##D4K0 I-9q= (4++==O)OMM(N	

 %' 	
 HHi^$qwwqz" 	JA88AE?QXXa[0DqyQq!tW__&YN)CCcII	J
 !tr   )max_workersN)r  r   r,   )axis)r  r0   r1   r2   r   r   r  multiprocessing	cpu_countr   
csc_matrixr   r   r*  submitresultrU  hstackr\  r4   r   r  r   r  rg   )r   r    r   r  rO   rR   r   r  futuresexecutorr   	X_results	y_resultsr  r  arrr  s   ```             @rU   make_sparse_regressionr  W  s=    299m,ii##D)MM(N
 JJ3c	J:!t O--/<I !2!2  . G			2 ;hy! 	;ANN8??:q9:	;; II xxz1
 y>Y&&&#]]9UCC


9A			1771:qwwqz*+--A
qqA99Q<9$$$99Q<:%%%771:"""kkmyy|y(((yy|z)))C1HAv6M;; ;s   >2II!	n_stringsseedc                 >   d}t               }t        j                  j                  |      }t	        |      | k  rZdj                  |j                  t        t        j                        |d            }|j                  |       t	        |      | k  rZt        |      S )zGenerate n unique strings.r    Tr   )setr0   r1   r   rU  r  r   r   stringascii_lettersadd)r  r  name_lenunique_stringsrO   
random_strs         rU   unique_random_stringsr    s    H"uN
))


%C
n
	
)WWJJtF001$JO

 	:&	 n
	
) r   r  r]   )r   	cat_ratior   r   	cat_dtypen_categoriesonehotr  r   r  c          	      "   t        j                  d      }	t        j                  j	                  |      }
|	j                         }t        |      D ]1  }|
j                  d|d      d   }|dk(  rt        j                  |t        j                        r4t        j                  t        ||            }|
j                  || d      }n*t        j                  d|      }|
j                  d||       }|	j                  |d	      |t!        |      <   |t!        |         j"                  j%                  |      |t!        |      <   |
j                  d||       }|	j                  ||j&                  	      |t!        |      <   4 t        j(                  | f
      }|j*                  D ]J  }t-        ||   j&                  |	j.                        r|||   j"                  j0                  z  }C|||   z  }L |dz  }|dkD  rt        |      D ]  }|
j                  d| dz
  t3        | |z              }t        j4                  |j6                  ||f<   t9        |j:                  j6                  |         sh|t        j<                  |j:                  j6                  |   j>                        j@                  k(  rJ  |jB                  d   |k(  sJ |r|	jE                  |      }|r+tG        |j*                        }|
jI                  |       ||   }||fS )a/  Generate categorical features for test.

    Parameters
    ----------
    n_categories:
        Number of categories for categorical features.
    onehot:
        Should we apply one-hot encoding to the data?
    sparsity:
        The ratio of the amount of missing values over the number of all entries.
    cat_ratio:
        The ratio of features that are categorical.
    shuffle:
        Whether we should shuffle the columns.
    cat_dtype :
        The dtype for categorical features, might be string or numeric.

    Returns
    -------
    X, y
    r#   r,   r-   r   Tr   r&   r^   r*   r   r  )%r.   r/   r0   r1   r2   r   r   rK   
issubdtypestr_rI   r  r   rj  r3   rl   r   r   set_categoriesr+   r   r   rk   rn   r  r   rg   r  r   rQ   rw  
categoriesr)   r   get_dummiesr   r   )r   r    r  r  r   r  r   r   r  rN   rO   rT   r   r   r  r  numlabelcolrn  r   s                        rU   make_categoricalr    s   B 
		X	&B
))


-C	B: 9a3A6Q;}}Y0XX&;L!&LM
JJz	4JHYYq,7
KKALyKI1J7Bs1vJCF66zBBs1vJ++!,Y+GC3cii8Bs1vJ9  HHI<(Ezz bgmmR%8%89RW[[&&&ERWE	
 
QJE#~z" 	TAKKIMI4H0I   E !#BGGE1Hryy~~a01#ryy1B1M1M'N'S'SSSS	T 88A;*$$$^^Brzz"G[u9r   )F)r   )Wr9  r  r  r  r  concurrent.futuresr   dataclassesr   r   r   r   r   r   r	   r
   r   r   r   r   r   r   urllibr   numpyr0   r.   r<  numpy.randomRNGscipyr   corer   r   rs   r   r   sklearnr   r   trainingr   r  compatr   
DataFrameTr/   r   Memorymemoryr   ndarrayrV   rt   r|   r   cacher   r   r   r   r  r   r:  r   rM   r/  r=  r5   rF  r1  r@  rH  rd  r   rD   rs  r  r  r6   r  r  r  r  	DTypeLiker  r>  r   rU   <module>r     sR   $  	   1 !        )  + 9 * (0J			X	&	|Q	/88 #8uRZZ+,dD898v<9 <~7 7t
3 
4 
 "bjj"**&< = " " "E"**bjj01 " " 8E"**bjj01 8 8 E"**bjj01    s%
BJJ 67 s sl 55

JJJJ
JJJJ
JJJJ
5 5x 	   	   4

T"**-tBJJ/??@< !!3;;rxx#8#++bhh:OO
P   	!
 	!4 4n#BHH% #%S[[#++0U*V #& 	!!
{{288! 
RXX	! 	!
 	[[!HN
!!3;;rxx#8#++bhh:OO
PNRZZ(N NF(Y (5HY<O1O+P (V..
{{288. 
RXX	. KK!	.
 
RXX	. 
KKKKKK.b+, +,DM +,3 +,SW +,^ RR #R/4R@DR
5""#RZZ/0R Rj S    S	  , %'XXPPP P
 P P P P P yy""P 9bjj !Pr   