
    bi7                    
   U d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlZddlZddl Z ddlm!Z" dd	lm#Z$ dd
lm%Z& ddl'm(Z( ddlm)Z) ddl*m+Z+ ddl*m,Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4 ddl5m6Z6 ddl/m7Z8 ddl/m9Z: ddl/m;Z< ddl=m>Z>m?Z? ddl@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZH ddlImJZJmKZK ddlLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZX ddlYmZZZ ddl[m\Z] ddlIm^Z^m_Z_m`Z` ddlambZbmcZc ee"j                  e&j|                  e&j                  f   Zfeegd<   ee"j                  e&j|                  f   Zheegd <    ed!eAed"      Zig d#Zj ej                  d$      Zled%e+fd&       Zmed%enfd'       Zoed%enfd(       Zpd)eqd*eeeer   eeereqf      f      d+eeq   d%e:fd,Zsd)eqd-eer   d.eeereqf      d+eeq   d%e:f
d/Zt G d0 d1e/j                        Zud2ed3   d%d3fd4Zv G d5 d6      Zw ed7      Zx ed8      Zyd2ed3   d9eeyexf   d:ed;eer   d%exf
d<Zz G d= d>ew      Z{d?end@ed%eBfdAZ|	 	 dd2d3d)eqdBeeeref      dCee8   d%eereereqf   f   f
dDZ}d%eeeref      fdEZ~dFewdGeeeewerf         d%eer   fdHZd;eer   d2d3d%dfdIZdJedKeqd:edLeeq   dMeer   dNeqd%eeBeeeBerf      f   fdOZd2d3dPeeref   dBeeeref      dQeeref   dFewdReqdGeeeewerf         dSeeD   dTeeq   dUeeqenf   dVeeA   dWeee6      dXeeC   dCee8   d%eei   fdYZeG	 ddddddZdddd[d2d3dQeeref   dFewdReqdGeeeewerf         dSeeD   dTeeq   dVeeA   dUeeqenf   dWeee6      dXeeC   dCee8   d%efd\       Z\d]end^ed%enfd_Zd`edaedbeeq   d]end%ef
dcZddededfd`ehdgeef   d^eeqdhf   dieeqerf   d%effdjZdeeAdkeqd]endlend@ed%eeeqdhf   eeqerf   f   fdmZd2d3dneeAedff   d%dffdoZd2d3dPeeref   dneeAedff   d`ehdpendqedrendsendtenduendvendwe4dxend%effdyZeGdze j                  dzdzdzdzdZd{dzd|	d2ed3   dneeieAdff   d`eewehf   dpendqedrendsendtenduendvendwe4dxend%efd}       Zd2d3dPeeref   dneeAedff   d`ehdwe4d~erdqedvendgeef   dxend%effdZeGd{de j                  dZddzdd2ed3   dneeieAdff   d`ehdwe4d~erdqedvendgeef   dxend%efd       Zd2ed3   deer   deer   deeq   d@ed%eeweeeewerf         f   fdZedndd2d3d%efd       Z G d deO      Z eXdddng       G d deRe             Z eXdddng       G d deNe             Z eXdddngdd       G d deQe             Z eXddndgd       G d de             Z eXddndgd       G d de             Zy)aS  
Dask extensions for distributed training
----------------------------------------

See :doc:`Distributed XGBoost with Dask </tutorials/dask>` for simple tutorial.  Also
:doc:`/python/dask-examples/index` for some examples.

There are two sets of APIs in this module, one is the functional API including
``train`` and ``predict`` methods.  Another is stateful Scikit-Learner wrapper
inherited from single-node Scikit-Learn interface.

The implementation is heavily influenced by dask_xgboost:
https://github.com/dask/dask-xgboost

Optional dask configuration
===========================

- **coll_cfg**:
    Specify the scheduler address along with communicator configurations. This can be
    used as a replacement of the existing global Dask configuration
    `xgboost.scheduler_address` (see below). See :ref:`tracker-ip` for more info. The
    `tracker_host_ip` should specify the IP address of the Dask scheduler node.

  .. versionadded:: 3.0.0

  .. code-block:: python

    from xgboost import dask as dxgb
    from xgboost.collective import Config

    coll_cfg = Config(
        retry=1, timeout=20, tracker_host_ip="10.23.170.98", tracker_port=0
    )

    clf = dxgb.DaskXGBClassifier(coll_cfg=coll_cfg)
    # or
    dxgb.train(client, {}, Xy, num_boost_round=10, coll_cfg=coll_cfg)

- **xgboost.scheduler_address**: Specify the scheduler address

  .. versionadded:: 1.6.0

  .. deprecated:: 3.0.0

  .. code-block:: python

      dask.config.set({"xgboost.scheduler_address": "192.0.0.100"})
      # We can also specify the port.
      dask.config.set({"xgboost.scheduler_address": "192.0.0.100:12345"})

    N)defaultdict)contextmanager)cachepartialupdate_wrapper)Thread)Any	AwaitableCallableDict	GeneratorIterableListOptional	ParamSpecSequenceSetTuple	TypeAlias	TypedDict	TypeGuardTypeVarUnion)array)bag)	dataframe)Delayed)Future)Version)parse   )
collectiveconfig)FeatureNamesFeatureTypesIterationRange)TrainingCallback)Config)_Args)_ArgVals)	DataFramelazy_isinstance)BoosterDMatrixMetric	ObjectiveXGBoostError_check_distributed_params_deprecate_positional_args_expect)_is_cudf_ser_is_cupy_alike)XGBClassifierXGBClassifierBaseXGBModel	XGBRankerXGBRankerMixInXGBRegressorBase_can_use_qdm_check_rf_callback_cls_predict_proba_objective_decorator_wrap_evaluation_matricesxgboost_model_doc)RabitTracker)train   )_create_dmatrix_create_quantile_dmatrixno_group_split)get_address_from_userget_n_threads_DaskCollection_DataTTrainReturnTboosterhistory)CommunicatorContextDaskDMatrixDaskQuantileDMatrixDaskXGBRegressorDaskXGBClassifierDaskXGBRankerDaskXGBRFRegressorDaskXGBRFClassifierrD   predictinplace_predictz[xgboost.dask]returnc                  4    t        t        j                        S N)parse_versiondask__version__     P/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/xgboost/dask/__init__.py_DASK_VERSIONrd      s    ))**rb   c                  .    t               t        d      k\  S )Nz	2024.12.1rd   r^   ra   rb   rc   _DASK_2024_12_1rg      s    ?mK888rb   c                  .    t               t        d      k\  S )Nz2025.3.0rf   ra   rb   rc   _DASK_2025_3_0ri      s    ?mJ777rb   	n_workersaddrstimeoutc           	      j   i }	 t        |d   t              r%|d   d   }|d   d   }t        | ||d|dn|      }n,|d   }t        |t              s|J t        | |d|dn|      }|j	                          t        |j                        }d|_        |j	                          |j                  |j                                |S # t        $ rc}	t        |      dk  r t        j                  d	t        |d         t        |d         t        |	             t        | |dd  |      }Y d }	~	|S d }	~	ww xY w)
Nr   rE   task)rj   host_ipportsortbyrl   )rj   ro   rq   rl   )targetTr!   zCFailed to bind address '%s', trying to use '%s' instead. Error:
 %s)
isinstancetuplerC   strstartr   wait_fordaemonupdateworker_argsr1   lenLOGGERwarning_try_start_tracker)
rj   rk   rl   envro   rp   rabit_trackeraddrthreades
             rc   r~   r~      sD   
 C%@eAh&AhqkG8A;D(#$_'M 8DdC(DL88(#$_'	M 	}556

=,,./ J  	@u:>RaMaMF		
 !E!"Iw?J	@s   C C 	D2AD--D2addr_from_daskaddr_from_userc                 $    t        | ||g|      }|S )z8Start Rabit tracker, recurse to try different addresses.)r~   )rj   r   r   rl   r   s        rc   _start_trackerr      s     Y(H'
RCJrb   c                   ,     e Zd ZdZdeddf fdZ xZS )rQ   zNA context controlling collective communicator initialization and finalization.argsr[   Nc                     t        |   di | t        j                         }d|j                   d|j
                   | j                  d<   y )Nz[xgboost.dask-z]:DMLC_TASK_IDra   )super__init__distributed
get_workernameaddressr   )selfr   worker	__class__s      rc   r   zCommunicatorContext.__init__   sG     4 '')
 '5V[[MFNNCS$T		.!rb   )__name__
__module____qualname____doc__CollArgsValsr   __classcell__r   s   @rc   rQ   rQ      s"    XU| U U Urb   rQ   clientdistributed.Clientc                 &   t        | t        t        j                               t        d      f      sDt	        t        t        t        j                               t        d      gt        |                   | t        j                         }|S | }|S )z#Simple wrapper around testing None.N)rs   typer   
get_client	TypeErrorr4   )r   rets     rc   _get_clientr     s{    ftK$:$:$<=tDzJKT+0023T$Z@$v,O
 	
 '-n+
 
 
"CJ ;ACJrb   c            #       h   e Zd ZdZe	 ddddddddddddddded   dedee   d	ee   d
ee   dee   de	dee
   dee   dee   dee   dee   dee   dee   de	ddf d       ZdefdZdddddddddddedee   dee   d
ee   dee   dee   dee   dee   dd fdZdedeeef   fdZdefdZy)rR   aq  DMatrix holding on references to Dask DataFrame or Dask Array.  Constructing a
    `DaskDMatrix` forces all lazy computation to be carried out.  Wait for the input
    data explicitly if you want to see actual computation of constructing `DaskDMatrix`.

    See doc for :py:obj:`xgboost.DMatrix` constructor for other parameters.  DaskDMatrix
    accepts only dask collection.

    .. note::

        `DaskDMatrix` does not repartition or move data between workers.  It's the
        caller's responsibility to balance the data.

    .. note::

        For aligning partitions with ranking query groups, use the
        :py:class:`DaskXGBRanker` and its ``allow_group_split`` option.

    .. versionadded:: 1.0.0

    Parameters
    ----------
    client :
        Specify the dask client used for training.  Use default client returned from
        dask if it's set to None.

    NF)weightbase_marginmissingsilentfeature_namesfeature_typesgroupqidlabel_lower_boundlabel_upper_boundfeature_weightsenable_categoricalr   r   datalabelr   r   r   r   r   r   r   r   r   r   r   r   r[   c                   t        |      }|| _        |	| _        ||nt        j                  | _        || _        ||t        d      |
t        d      t        |j                        dk7  rt        d|j                         t        |t        j                  t        j                  f      s<t!        t#        t        j                  t        j                  ft%        |                  t        |t        j                  t        j                  t        j&                  t%        d       f      sKt!        t#        t        j                  t        j                  t        j&                  ft%        |                  |j                  d   | _        t        | j(                  t*              sJ t-        t.              | _        d| _        |j5                  | j6                  |||||||||
      | _        y )Nz$per-group weight is not implemented.z4group structure is not implemented, use qid instead.r!   z$Expecting 2 dimensional input, got: rE   F)	r   r   r   weightsr   r   r   r   r   )r   r   r   numpynanr   r   NotImplementedErrorr{   shape
ValueErrorrs   ddr+   daArrayr   r4   r   Series_n_colsintr   list
worker_mapis_quantilesync_map_local_data_init)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                   rc   r   zDaskDMatrix.__init__6  s   ( V$**")"5w599"4?v1%&LMM%F  tzz?aCDJJ<PQQ$rxx 89GR\\288$<d4jIJJ%",,"))T$Z!PQGR\\288RYY$GeUVVzz!}$,,,,,3>t3D!&[[  #+// ! 

rb   c                 6    | j                   j                         S r]   )r   	__await__r   s    rc   r   zDaskDMatrix.__await__r  s    zz##%%rb   )r   r   r   r   r   r   r   r   c       	   
          !K   dt         t           dt        dt         t           dt        dt        f
ddt        dt         t           ffd!d	t
        t           dt
        t         t              f!fd
}
 !|       |
|      } |
|      } |
|      } |
|      } |
|      } |
|	      }di dt
        t         t              dt        ddf fd} ||d        ||d        ||d        ||d        ||d        ||d       g }t        t                    D ]5  }i } j                         D ]  \  }}||   ||<    |j                  |       7 t        t        t        j                  |            }j                  |      }t        j                   |       d{    |D ]"  }|j"                  dk(  rJ |j"                          i | _        t'        |      D ]  \  }}|| j$                  |j(                  <     |D ci c]  }|j(                  | }}j*                  j-                  |D cg c]  }|j(                   c}       d{   }t/        t              }|j                         D ].  \  }}|t1        t3        |               j                  ||          0 || _        |	d| _        | S j                  |      j9                          d{   | _        | S 7 Ec c}w c c}w 7 7 w)z Obtain references to local data.left	left_nameright
right_namer[   c           
      H    d| d| dt        |        dt        |       d	}|S )NzPartitions between z and z are not consistent: z != z/.  Please try to repartition/rechunk your data.r{   )r   r   r   r   msgs        rc   inconsistentz1DaskDMatrix._map_local_data.<locals>.inconsistent  sA     &i[j\ B"4ykc%j\ :?@ 
 Jrb   dc                    j                  |       } t        | j                  d      rIt        | j                  j                        dkD  r'| j                  j                  d   dkD  rt        d      j                  |       S )zBreaking data into partitions.r   rE   zData should be partitioned by row. To avoid this specify the number of columns for your dask Array explicitly. e.g. chunks=(partition_size, -1]))persisthasattr
partitionsr{   r   r   
futures_of)r   r   s    rc   
to_futuresz/DaskDMatrix._map_local_data.<locals>.to_futures  sr    q!Ag.**+a/LL&&q)A- 4  $$Q''rb   metac                     | 
 |       }|S y r]   ra   )r   
meta_partsr   s     rc   flatten_metaz1DaskDMatrix._map_local_data.<locals>.flatten_meta  s    +5d+;
!!rb   r   m_partsr   Nc                 d    | -t              t        |       k(  sJ  d| |             | |<   y y )NXr   )r   r   X_partsr   partss     rc   append_metaz0DaskDMatrix._map_local_data.<locals>.append_meta  sF    "7|s7|3 \S'46 3 &d	 #rb   r   r   r   r   r   r   finished)keys)r   r	   ru   rK   r   r   ranger{   itemsappendr   mapr_   delayedcomputer   waitstatuspartition_order	enumeratekey	schedulerwho_hasr   nextiterr   r   result)"r   r   r   r   r   r   r   r   r   r   r   y_partsw_partsmargin_parts	qid_partsll_partslu_partsr   packed_partsi	part_dictr   valuedelayed_parts	fut_partspartkey_to_partitionr   r   workersr   r   r   r   s"    `                            @@@@rc   r   zDaskDMatrix._map_local_datau  s    	s)	(+	48I	KN			(/ 	(d6l 	( 	x8 	Xd6l=S 	 T"u%w'#K0 %	 12 12*0'):	&$v,!7 	&s 	&t 	& 	GW%GX&L-0Iu%H12H12
 13s7|$ 	+A+-I#kkm *
U!&q	#*	*		+ (,Cl,K'L #).."?	y))) 	:D;;*,9dkk9,	:
  " + 	/GAt-.D  *	/ 8AAtDHHdNAA4:4D4D4L4L'01t$((1 5M 5
 /
 /:$.?
#MMO 	JLCtDM*+223CC3HI	J %"#'D   *0)H)O)O)Q#QD 9 	* B1/
 $RsV   F!K>(K-)K>A	K>K0K>:K5K>K:BK>!K<"K>0K><K>worker_addrc           	          | j                   | j                  | j                  | j                  | j                  | j
                  j                  |d      | j                  dS )z\Create a dictionary of objects that can be pickled for function
        arguments.

        N)r   r   r   r   r   r   r   )r   r   r   r   r   r   getr   )r   r  s     rc   _create_fn_argszDaskDMatrix._create_fn_args  sV     "//!//#33||"&"9"9__((d;++
 	
rb   c                     | j                   S )zxGet the number of columns (features) in the DMatrix.

        Returns
        -------
        number of columns
        )r   r   s    rc   num_colzDaskDMatrix.num_col  s     ||rb   r]   )r   r   r   r   r3   r   rL   rK   floatboolr$   r%   r   r   r   r   ru   r   r	   r  r   r	  ra   rb   rc   rR   rR     s   6  
 ,0	9
 -115#'0404+/)-7;7;59#(#9
-.9
 9
 (	9
 )9
 o.9
 %9
 9
  -9
  -9
 (9
 o&9
 $O49
 $O49
  "/2!9
" !#9
$ 
%9
  9
v&9 & ,0-115)-597;7;x %x 	x
 (x /*x o.x o&x "/2x $O4x $O4x 
xt
3 
4S> 
 rb   rR   _MapRetT_Pfuncrefsr  c          
        K   t        |       } g }|D ]  }g }|D ]D  }t        |t              r!|j                  |j	                  |             4|j                  |       F dt
        dt        j                  dt        j                  dt        t           ffd} | j                  t        t        ||      |      g|d|gdd}	|j                  |	        d	t        t        t              dt        t           fd
}
t!        j"                  |      }|j%                  |
|
       d{   }	| j'                  |	      j)                          d{   }|S 7 -7 w)z.Map a function onto partitions of each worker._addressr   kwargsr[   c                     t        j                         }|j                  | k7  rt        d|j                   d|  d       |i |gS )NzInvalid worker address: z, expecting zz. This is likely caused by one of the workers died and Dask re-scheduled a different one. Resilience is not yet supported.)r   r   r   r   )r  r   r  r   r  s       rc   fnz!map_worker_partitions.<locals>.fn   s^     ++-F~~) .v~~.>l8* UU U  $)&)**rb   FT)purer  allow_other_workersresultsc                     | D ]  }||c S  y r]   ra   )r  vs     rc   first_validz*map_worker_partitions.<locals>.first_validF  s      	A}	 rb   N)r   rs   rR   r   r  ru   r  r   r  r   r  submitr   r   r   r   dbfrom_delayed	reductionr   r   )r   r  r  r  futuresr   r   refr  futr  r   r   s    `           rc   map_worker_partitionsr"    sO      FG - 	!C#{+C//56C 	!
	+ 
	+RWW 
	+		 
	+d8n 
	+: fmm72t,b1

 F $
 	s[-^Xhx&89 hx>P  //'
"Ck;7
7C>>#&--//FM 8/s$   D&E)E*&EEEEc            )       (    e Zd ZdZe	 dddddddddddddddddded   dedee   d	ee   d
ee   dee   de	dee
   deeeee   f      dee   dee   dee   dee   dee   dee   dee   de	dee   ddf& fd       Zdedeeef   f fdZ xZS )rS   zmA dask version of :py:class:`QuantileDMatrix`. See :py:class:`DaskDMatrix` for
    parameter documents.

    NF)r   r   r   r   r   r   max_binr   r   r   r   r   r   r   max_quantile_batchesr   r   r   r   r   r   r   r   r   r   r$  r   r   r   r   r   r   r   r%  r[   c                    t         |   ||||||||||||||	|       |
| _        || _        d| _        |t        |      | _        y d | _        y )N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   T)r   r   r$  r%  r   id_ref)r   r   r   r   r   r   r   r   r   r   r$  r   r   r   r   r   r   r   r%  r   s                      rc   r   zDaskQuantileDMatrix.__init__Y  sr    . 	#//+''1 	 	
" $8!.1o2c7	4	rb   r  c                     t         |   |      }| j                  |d<   | j                  |d<   | j                  | j                  |d<   |S )Nr$  r%  r   )r   r  r$  r%  r(  )r   r  r   r   s      rc   r  z#DaskQuantileDMatrix._create_fn_args  sL    w&{3,,Y'+'@'@#$99 ))DKrb   r]   )r   r   r   r   r3   r   rL   rK   r
  r  r$   r   r	   r   r   rR   r   ru   r   r  r   r   s   @rc   rS   rS   S  s   
  
 ,0	*H -115#'049=!%%)+/)-7;7;59#(.2)*H-.*H *H (	*H )*H o.*H %*H *H  -*H  c49n 56*H #*H k"*H (*H o&*H  $O4!*H" $O4#*H$ "/2%*H& !'*H( 'sm)*H* 
+*H  *HX3 4S>  rb   rS   r   r  c                 2    | rt        di |S t        di |S Nra   )rG   rF   )r   r  s     rc   _dmatrix_from_list_of_partsr,    s!    '1&11$V$$rb   dconfigcoll_cfgc                   K   |
t               n|}d}d}t        ||      \  }}|||f}nd}	 t        j                  j	                  | j
                  j                        }|j                  d      }| j                  t        ||||j                         d{   }|j                  |      }|S # t        $ r d}Y Kw xY w7 (w)zBGet rabit context arguments from data distribution in DaskDMatrix.Nr   z/:)
CollConfigrI   r   commget_address_hostr   r   strip	Exceptionrun_on_schedulerr   tracker_timeoutget_comm_config)	r   rj   r-  r.  ro   rp   	user_addr
sched_addrr   s	            rc   _get_rabit_argsr:    s       (/z|XH!GD)'8<MGTdO		 %%66v7G7G7O7OP
%%d+

 ''	:y(:R:R C 
"
"3
'CJ  
s5   +CAB0 2&CCC0B>;C=B>>Cc                  D    t         j                  j                  dd       S )Nxgboost)default)r_   r#   r  ra   rb   rc   _get_dask_configr>    s    ;;??9d?33rb   dtrainevalsc                 ^   t        | j                  j                               }|r~|D ]y  }t        |      dk(  sJ t	        |d   t
              rt	        |d   t              sJ |d   | u rCt        |d   j                  j                               }|j                  |      }{ t        |      S )Nr!   r   rE   )	setr   r   r{   rs   rR   ru   unionr   )r?  r@  X_worker_mapr   r   s        rc   _get_workers_from_datarE    s     !!2!2!7!7!9:L 	:Aq6Q;;adK0Z!c5JJJtv~QqT__1134J'--j9L	: rb   c                    K   |j                   j                          d {   }|d   j                         }t        |       |z
  }|rt	        d|       y 7 6w)Nr  zMissing required workers: )r   identityr   rB  RuntimeError)r  r   infocurrent_workersmissing_workerss        rc   _check_workers_are_aliverL    s`      !!**,,D9o**,O'l_4O77HIJJ  -s   AA7A	train_reftrain_idevals_id
evals_name	n_threadsc                L   t        di | d|i}g }t        |      D ]  \  }}	||   |k(  r|j                  |||   f       %|	j                  dd       '|	d   |k7  rt	        d      |	d= t        di |	||d}
nt        di |	d|i}
|j                  |
||   f        ||fS )Nnthreadr   zSThe training DMatrix should be used as a reference to evaluation `QuantileDMatrix`.)rS  r   ra   )r,  r   r   r  r   )rM  rN  rO  rP  rQ  r  Xyr@  r   r   eval_Xys              rc   _get_dmatricesrV    s     
%	Dy	D)	DB')ED/ /3A;("LL"jm,-775$+5zX% *  E
1SCSPRSG1KCKKGgz!}-./  u9rb   global_configparamsnum_boost_roundobjearly_stopping_roundsverbose_eval	xgb_model	callbackscustom_metricc                 6  	
K   t        ||      }t        ||        d {    t        | t        |      ||       d {   }t	        |       dt
        dt
        t        t        t        t        f   f   dt        dt        t           dt        t           dt        dt        d	t        t           f	
fd
}t        j                  ||       4 d {    |A|D cg c]  \  }}|	 }}}|D cg c]  \  }}|	 }}}|D cg c]  }t        |       }}ng }g }g }t!        | |||t        |      ||g|g|z   d|i d {   }|cd d d       d {    S 7 A7 $7 c c}}w c c}}w c c}w 7 07  # 1 d {  7  sw Y   y xY ww)N)r-  r.  
parameters	coll_argsrN  rP  rO  rM  r  r[   c                    t        j                         }| j                         }t        ||      }	|j	                  |	|	d       i }
j	                  d|	i       t        di |5  t        j                  di 5  t        ||g||||	d\  }}t        |||
t        |      dk7  r|nd       }d d d        d d d        j                         dk7  r|
d}|S d }|S # 1 sw Y   /xY w# 1 sw Y   3xY w)N)rS  n_jobsrS  )rO  rP  rQ  r   )rX  r?  rY  evals_resultr@  rZ  r_  r[  r\  r]  r^  rN   ra   )r   r   copyrJ   ry   rQ   r#   config_contextrV  worker_trainr{   num_row)ra  rb  rN  rP  rO  rM  r  r   local_paramrQ  local_historyrT  r@  rO   r   r^  r_  r[  rW  rY  rZ  r\  r]  s                  rc   do_trainz_train_async.<locals>.do_train  s3    '') oo'!+v6	yIFG35i34 -9- 	v/D/D/U}/U 	&  "%#IB #" /*"5zQeD+&;)##G	 	4 ::<1"(+C 
 C
C	 	 	 	s$   &C2<<C&8C2&C/	+C22C;r  )rE  rL  r:  r{   r2   r   ru   r   r   r   dictr   rM   r   	MultiLockr'  r"  )r   rW  r-  rX  r?  rY  r@  rZ  r[  r\  r]  r^  r_  r.  r  rb  rl  r   n
evals_datarP  rO  r   s    `   ` ``````          rc   _train_asyncrq     s    " %VU3G
"7F
333%Gg I f%22U38_,-2 2 I	2
 s)2 2 2 
,	2 2h $$Wf5  (-.1!.J.(-.1!.J.'12!12H2JJH,vJ
 h#
 
 
 /  y 4v..2
   s   FE'FE*BFE-F F(E/4F<E5FE;!.FF FF!F"F*F-F/FFF
FFFT)r@  rZ  r[  r]  r\  r^  r_  r.  c                    t        |       }  | j                  t        ft        j                         t               dt               S )a  Train XGBoost model.

    .. versionadded:: 1.0.0

    .. note::

        Other parameters are the same as :py:func:`xgboost.train` except for
        `evals_result`, which is returned as part of function return value instead of
        argument.

    Parameters
    ----------
    client :
        Specify the dask client used for training.  Use default client returned from
        dask if it's set to None.

    coll_cfg :
        Configuration for the communicator used during training. See
        :py:class:`~xgboost.collective.Config`.

    Returns
    -------
    results: dict
        A dictionary containing trained booster and evaluation history.  `history` field
        is the same as `eval_result` from `xgboost.train`.

        .. code-block:: python

            {'booster': xgboost.Booster,
             'history': {'train': {'logloss': ['0.48253', '0.35953']},
                         'eval': {'logloss': ['0.480385', '0.357756']}}}

    )rW  r-  )r   r   rq  r#   
get_configr>  locals)r   rX  r?  rY  r@  rZ  r[  r]  r\  r^  r_  r.  s               rc   rD   rD   h  sH    b  F6;;'') " (	 rb   is_dfoutput_shapec                 &    | xr t        |      dk  S Nr!   r   )ru  rv  s     rc   _can_output_dfry    s    +S&!++rb   r   
predictioncolumnsc                    t        ||j                        rt        | dd      }t        | dd      rZddl}|j
                  dk(  r"|j                  i |t        j                        S |j                  ||t        j                  |      }|S |j
                  dk(  rt        i |t        j                  |      S t        ||t        j                  |      }|S )z0Return dataframe for prediction when applicable.indexNzcudf.core.dataframer+   r   )r{  dtype)r{  r~  r}  )	ry  r   getattrr,   cudfsizer+   r   float32)r   rz  r{  ru  r}  r  s         rc   _maybe_dataframer    s     eZ--.
 gt,4!6D!#~~b'~OOG5== ( J  !# WEMMQVWW"G5==J rb   mapped_predictrO   zdistributed.Futurer   .r   c                 X  K   t        |j                               }t        |      dk\  r%t        |t        j
                        rt        d      t        t        |t        j
                        |      r|+t        |t        j                        r|j                         }n|}t	        j                  | ||d||t        j                  j                  |            }t        |      dk(  r|j                  d d df   }|S |;t        |t        j                  t        j
                  f      r|j!                         }	n|}	t        |      dk(  rdg}
g }ndg }
t        |t        j
                        r!t#        t%        t        |      dz
              }n't%        t        |      dz
        D cg c]  }|dz   	 }}t        |      dk(  r1t#        |j&                        }t        |t"              sJ |d   f|d<   nd }t        j(                  | ||d||	||
|t*        j,                  	
      }|S c c}w w)
N   zGUse `da.Array` or `DaskDMatrix` when output has more than 2 dimensions.T)r   rE   r   r!   F)chunks	drop_axisnew_axisr~  )rt   r   r{   rs   r   r+   r   ry  r   r   to_dask_dataframemap_partitionsutils	make_metailocr   to_dask_arrayr   r   r  
map_blocksr   r  )r  rO   r   r   rv  r   r{  base_margin_dfpredictionsbase_margin_arrayr  r  r   r  s                 rc   _direct_predict_implr    s     DIIK G
<A*T2<<"@ U
 	
 jr||4lC"z+rxx'H --/  )N''##D)
 |!%**1a40KX U "z"))R\\2(
 5@4M4M4O + |!01sI.0HI$-c,&7!&; <=+0\1BQ1F+GHaAEHH|! -1,=Ffd+++%a*F1IFmm--
 3 Is   F%H*'H%3A7H*featuresinplacec                    t        |t              sJ t        j                  j	                  d      }|j                  d|      }|r)|j                         }|j                  d      dk(  rd|d<   t        |d      } | j                  |fdd	i|}t        |j                        dkD  r|j                  d   nd}	i }
t        ||j                        rt        |	      D ]  }d
|
|<   	 |j                  |
fS )z@Create a dummy test sample to infer output shape for prediction.i  rE   predict_typemarginToutput_margin)r   validate_featuresFf4)rs   r   r   randomRandomStaterandnrf  popr.   rY   r{   r   ry  r   )rO   r  ru  r  r  rngtest_samplem
test_predt	n_columnsr   r   s               rc   _infer_predict_outputr    s     h$$$
,,
"
"4
(C))Ax(K::n%1&*F?#5A FeFvFJ'*:+;+;'<q'@
  #aIDeZ--.y! 	ADG	T!!rb   modelc                   K   t        |t              r| j                  |d       d {   }|S t        |t              r | j                  |d   d       d {   }|S t        |t        j
                        r&|}|j                  }|t        urt        d|       |S t        t        t        t        t        j
                  gt        |                  7 7 w)NF)hashrO   z9Underlying type of model future should be `Booster`, got )	rs   r-   scatterrm  r   r   r   r   r4   )r   r  rO   ts       rc   _get_model_futurer  .  s      %!u599 N 
E4	 uY'7eDD N 
E;--	.LLGKA3O 
 N $0B0B CT%[QRR :Ds"   'CC/CCA>CCr  r   	pred_leafpred_contribsapprox_contribspred_interactionsr  iteration_rangestrict_shapec       	           	
()K   t        | |       d {   }t        |t        t        j                  t
        j                  f      sAt        t        t        t        j                  t
        j                  gt        |                  dt        dt        dt        dt        t           dt        dt        f	
f
d}t        |t        j                  t
        j                  f      rx| j                  | j!                  t"        ||j$                  d   t        |t
        j                        d		
             d {   \  }}t'        |||d ||       d {   S | j                  | j!                  t"        ||j)                         d	d		             d {   \  }}|j*                  }|j,                  (|j.                  )|j0                  dt        dt2        t4        t        f   dt6        j8                  f()	
fd}g }g }g }g }t;        |j<                  j?                               }|D ]l  }|j<                  |   }|jA                  |       |jA                  tC        |      |gz         |jA                  |D cg c]  }||jD                      c}       n tG        ||      D ],  \  }}| j!                  d ||g      }|jI                  |       . t;        tG        ||||            }tK        |d       }|D  !cg c]	  \  }} }!}| }}!} }}|D  !cg c]	  \  }} }!}|  }}!} }}|D  !cg c]	  \  }} }!}| }}!} }}g }"tG        ||      D ],  \  }}| j!                  ||||g      }#|"jI                  |#       . g }$| jM                  |       d {   }tO        |      D ]D  \  }%}&|$jI                  t        jP                  |"|%   |&f|dd  z   t6        jR                               F t        jT                  |$d      }'|'S 7 7 7 7 c c}w c c}}!} }w c c}}!} }w c c}}!} }w 7 w)NrO   	partitionru  r{  _r[   c                    
 t        j                  di 5  t        |
d      }| j                  |		      }t	        ||||      }|cd d d        S # 1 sw Y   y xY w)NT)r   r   r   )	r   r  r  r  r  r  r  r  r  ra   )r#   rg  r.   rY   r  )rO   r  ru  r{  r  r  predtr  rW  r  r   r  r  r  r  r  r  s          rc   r  z&_predict_async.<locals>.mapped_predictZ  s     ""3]3 	#'A
 OO+#+ /"3"3 /) $ 
E %YwFE%	 	 	s   8AA"rE   F)	r  ru  r  r  r  r  r  r  r  r  rO   r   r   rv  r   )
rO   r  ru  r  r  r  r  r  r  r  r  c                     |d   }|j                  dd       }t        j                  di 	5  t        ||d      }| j	                  |
	      }|cd d d        S # 1 sw Y   y xY w)Nr   r   T)r   r   r   r   r   )r  r  r  r  r  r  r  r  ra   )r  r#   rg  r.   rY   )rO   r  r   r   r  r  r  r   r   rW  r  r   r  r  r  r  r  r  s         rc   dispatched_predictz*_predict_async.<locals>.dispatched_predict  s    F|hh}d3""3]3 	'++#'A OO+#+ /"3"3 /) $ 
E )	 	 	s   -A%%A.c                 &    | d   j                   d   S )Nr   r   )r   )r  s    rc   <lambda>z _predict_async.<locals>.<lambda>  s    tF|'9'9!'< rb   )r  c                     | d   S rx  ra   )ps    rc   r  z _predict_async.<locals>.<lambda>  s
    ad rb   )r   )r   r~  r   axis)+r  rs   rR   r   r   r   r+   r   r4   r   r-   r	   r  r   r   r   r  r  r   r  r	  r   r   r   r   r   ru   r   ndarrayr   r   r   extendr{   r   zipr   sortedgatherr   r  r  concatenate)*r   rW  r  r   r  r   r  r  r  r  r  r  r  _boosterr  _output_shaper   rv  r  r   r  	all_parts
all_orders
all_shapesall_workersworkers_addressr  list_of_partsr  wsparts_with_orderr   orderr  farraysr   rowsr  r   r   s*    `  `````````                           @@rc   _predict_asyncr  F  s)      'vu55Hd["((BLLABbhh EtDzRSS%(15@DS	NQ	 0 $2<<01$*NNMM%A r||4+#+ /"3)  %
 
t *)&
 
 	
 #NN!\\^''+/% 	 	
 OL!  **O&&M&&MllGG 4S> emm   2 IJJK4??//12O& Q4'3}-=>O?4884OP	Q
 {I. 4MM<dQCMP! C	:z;OP.NC3CDD/$uaDID5EFF1D%%FJF2BCC.ueQ1CKCG{I. 4MM,hqcMJq F}}Z00JZ( 
4OO
4'L,<"<EMM	

 ..a0KO 6:

n P EFC 1s   QP.D/QP1Q*P4+AQ/P70C.QP:
4A0Q$P?
2Q>Q
QQ
&AQ>Q?A0Q1Q4Q7Q:QF)r   r   )	r  r   r  r  r  r  r  r  r  c       	         ~    t        |       }  | j                  t        fdt        j                         it               S )a  Run prediction with a trained booster.

    .. note::

        Using ``inplace_predict`` might be faster when some features are not needed.
        See :py:meth:`xgboost.Booster.predict` for details on various parameters.  When
        output has more than 2 dimensions (shap value, leaf with strict_shape), input
        should be ``da.Array`` or ``DaskDMatrix``.

    .. versionadded:: 1.0.0

    Parameters
    ----------
    client:
        Specify the dask client used for training.  Use default client
        returned from dask if it's set to None.
    model:
        The trained model.  It can be a distributed.Future so user can
        pre-scatter it onto all workers.
    data:
        Input data used for prediction.  When input is a dataframe object,
        prediction output is a series.
    missing:
        Used when input data is not DaskDMatrix.  Specify the value
        considered as missing.

    Returns
    -------
    prediction: dask.array.Array/dask.dataframe.Series
        When input data is ``dask.array.Array`` or ``DaskDMatrix``, the return value is
        an array, when input data is ``dask.dataframe.DataFrame``, return value can be
        ``dask.dataframe.Series``, ``dask.dataframe.DataFrame``, depending on the output
        shape.

    rW  )r   r   r  r#   rs  rt  )r   r  r   r  r   r  r  r  r  r  r  r  s               rc   rY   rY     s5    f  F6;;~UV5F5F5HUFHUUrb   r  c        
           	K   t        |       } t        | |       d {   }
t        |t        j                  t
        j                  f      s<t        t        t        j                  t
        j                  gt        |                  |t        |t        j                  t
        j                  t
        j                  f      sKt        t        t        j                  t
        j                  t
        j                  gt        |                  dt        dt        dt        dt        t           dt        dt        f	fd}| j!                  | j#                  t$        |
|j&                  d   t        |t
        j                        d		
             d {   \  }}t)        ||
||||       d {   S 7 7 #7 	w)NrO   r  ru  r{  r   r[   c           
          t        j                  di 5  | j                  |	|
      }d d d        t        |||      }|S # 1 sw Y   xY w)N)r  r  r   r   r  r  ra   )r#   rg  rZ   r  )rO   r  ru  r{  r   rz  rW  r  r   r  r  r  s         rc   r  z._inplace_predict_async.<locals>.mapped_predict-  sm     ""3]3 		 00 /)'"3) 1 J		 &iWeL
		 		s   AArE   T)r  ru  r  r  r  r  r  )r   r  rs   r   r   r   r+   r   r4   r   r   r-   r	   r  r   r   r   r  r  r   r  )r   rW  r  r   r  r  r   r  r   r  rO   r  r   r   s    `  ```` `    rc   _inplace_predict_asyncr    s      F%fe44GdRXXr||452<< 8$t*EFFzrxxryy1( 2<< CT+EVWXX  c	
  
 * !ZZ]T2<<0%+% 	 		
 KE4 &%  S 5:s4   GF?E:GGG:G;GGGr   )r  r  r   r  r   r  c                ~    t        |       }  | j                  t        fdt        j                         it               S )a  Inplace prediction. See doc in :py:meth:`xgboost.Booster.inplace_predict` for
    details.

    .. versionadded:: 1.1.0

    Parameters
    ----------
    client:
        Specify the dask client used for training.  Use default client
        returned from dask if it's set to None.
    model:
        See :py:func:`xgboost.dask.predict` for details.
    data :
        dask collection.
    iteration_range:
        See :py:meth:`xgboost.Booster.predict` for details.
    predict_type:
        See :py:meth:`xgboost.Booster.inplace_predict` for details.
    missing:
        Value in the input data which needs to be present as a missing
        value. If None, defaults to np.nan.
    base_margin:
        See :py:obj:`xgboost.DMatrix` for details.

        .. versionadded:: 1.4.0

    strict_shape:
        See :py:meth:`xgboost.Booster.predict` for details.

        .. versionadded:: 1.4.0

    Returns
    -------
    prediction :
        When input data is ``dask.array.Array``, the return value is an array, when
        input data is ``dask.dataframe.DataFrame``, return value can be
        ``dask.dataframe.Series``, ``dask.dataframe.DataFrame``, depending on the output
        shape.

    rW  )r   r   r  r#   rs  rt  )	r   r  r   r  r  r   r  r   r  s	            rc   rZ   rZ   X  sB    j  F 6;;.4.?.?.AEKX rb   devicetree_methodr$  c                 ,   K   dt         t           dt        dt        f fd}t        d	d|i|\  }}| d{   }|||fS g }|D ]<  }	|	d   |u r|j	                  |	       |j	                  |	d    d{   |	d   f       > ||fS 7 Q7 w)
z(A switch function for async environment.r   r  r[   c                 T    t              rt        d| d|S t        ddi|S )N)r   r   r$  r   ra   )r=   rS   rR   )r   r  r   r  r$  r  s     rc   	_dispatchz2_async_wrap_evaluation_matrices.<locals>._dispatch  sC    V,& 3;A  3&3F33rb   create_dmatrixNr   rE   ra   )r   rR   r	   rA   r   )
r   r  r  r$  r  r  train_dmatrixr@  awaitedr   s
   ````      rc   _async_wrap_evaluation_matricesr    s     4x, 4 4 4 4 5XIXQWXM5''M}e##G +Q4= NN1ad
AaD)*	+
 '!! ( #s!   8BB=B<B
=BBDaskScikitLearnBasec              #   J   K   	 || _         |  d| _         y# d| _         w xY ww)z-Temporarily set the client for sklearn model.N)r   )r  r   s     rc   _set_worker_clientr    s%     
ts   # #	 #c                   T    e Zd ZdZdZdddee   deddf fdZde	d	e
d
e
dee   dee   defdZedddddde	d	e
d
e
dee   dee   defd       Z	 dde	dee   defdZ	 dde	dee   defdZdee   fdZdefdZedd       Zej0                  dd       ZdededefdZ xZS )r  z<Base class for implementing scikit-learn interface with DaskN)r.  r.  r  r[   c                2    t        |   di | || _        y r+  )r   r   r.  )r   r.  r  r   s      rc   r   zDaskScikitLearnBase.__init__  s    "6" rb   r   r  r  r   r  c          
      v  K   | j                  |      }| j                         sJ t        | j                  | j	                         |||rdnd| j
                  ||       d {   }t        |t        j                        r4|j                         }t               rt               s|j                         }|S 7 Tw)Nr  r   )r   r  r   r  r  r   r   r  )_get_iteration_range_can_use_inplace_predictrZ   r   get_boosterr   rs   r   r+   r  rg   ri   r   )r   r   r  r  r   r  predtss          rc   r  z"DaskScikitLearnBase._predict_async  s      33OD ,,...&;;""$+%2LL#/	
 	
 fbll+))+F  )9  )1	
s   A B9"B7#AB9FTr  r  r   r  r   c                X    | j                   j                  | j                  |||||      S )Nr  )r   r   r  )r   r   r  r  r   r  s         rc   rY   zDaskScikitLearnBase.predict  s8     {{'/#+   
 	
rb   c                   K   | j                  |      }t        | j                  || j                  | j                         d {   }t        | j                  | j                         |d|       d {   }|S 7 67 w)N)r   r   r   T)r  r   r  r  )r  rR   r   r   r   rY   r  )r   r   r  test_dmatrixr  s        rc   _apply_asyncz DaskScikitLearnBase._apply_async  s     
 33OD*5KKLL,,	+
 %
 KK""$+
 
 %

s$   AA?A;/A?4A=5A?=A?c                 R    | j                   j                  | j                  ||      S )N)r  )r   r   r  )r   r   r  s      rc   applyzDaskScikitLearnBase.apply  s&    
 {{ 1 11oVVrb   c                 h     dt         t           f fd} j                  |      j                         S )Nr[   c                     K    S wr]   ra   r   s   rc   r  z(DaskScikitLearnBase.__await__.<locals>._!  s     Ks   )r
   r	   _client_syncr   )r   r  s   ` rc   r   zDaskScikitLearnBase.__await__  s/    	3 	   #--//rb   c                 H    | j                   j                         }d|v r|d= |S )N_client)__dict__rf  )r   thiss     rc   __getstate__z DaskScikitLearnBase.__getstate__&  s(    }}!!#Yrb   c                 0    t        | j                        }|S )zThe dask client used in this model.  The `Client` object can not be
        serialized for transmission, so if task is launched from a worker instead of
        directly from the client process, this attribute needs to be set at that worker.

        )r   r  )r   r   s     rc   r   zDaskScikitLearnBase.client,  s     T\\*rb   c                 <    ||j                   nd| _        || _        y )NF)asynchronous_asynchronousr  )r   clts     rc   r   zDaskScikitLearnBase.client7  s     25S--erb   r  c                    | j                   }t        | dd      }	 t        j                          d}|rWt        j
                         5 }t        | |      5 } |j                  j                  |fi |d|i}|cddd       cddd       S  | j                  j                  |fi |d| j                  j                  iS # t        $ r d}Y w xY w# 1 sw Y   nxY wcddd       S # 1 sw Y   fxY w)zGet the correct client, when method is invoked inside a worker we
        should use `worker_client' instead of default client.

        Nr  FTr   )
r  r  r   r   r   worker_clientr  r   r   r   )r   r  r  r   	in_workerr   r  r   s           rc   r  z DaskScikitLearnBase._client_sync>  s     <<"4%@L"&&( 	  ..0 F+D&9 #T.dkk.. $*9E  #	# #   t{{VVT[[=U=UVV  "!	"# #
   s5   B? C'#C7	C'?CCC	C''C0r]   )r[   r   )r  r   r[   N)r   r   r   r   r  r   r0  r	   r   rL   r  rK   r&   r  r3   rY   r  r  r
   r   r   r  propertyr   setterr   r  r   r   s   @rc   r  r    s   FG;? !HZ$8 !3 !SW !
%% 	%
  % o.% ".1% 
%N  
 $"&1548

 	

  
 o.
 ".1
 

  
* 59 ".1 
	0 59WW ".1W 
	W09S> 0d    ]] W WS WS Wrb   z3Implementation of the Scikit-Learn API for XGBoost.
estimatorsc                   R   e Zd ZdZdededee   dee   deeeeef         deee      deee      d	e	e
ef   d
ee	eef      dee   defdZeddddddddddededee   dee   deeeeef         d	ee	e
ef      d
ee	eeef      deee      deee      dee   dd fd       Zy)rT   zAdummy doc string to workaround pylint, replaced by the decorator.r   ysample_weightr   eval_setsample_weight_eval_setbase_margin_eval_setverboser]  r   r[   c                0  K   | j                         }| j                  |	||
      \  }}}}
t        di d| j                  d| j                  d| j
                  d| j                  d|d|dd dd d	|d
|d|
d|d|d|dd dd d| j                  d| j                  d| j                   d {   \  }}t        | j                        rt        | j                        }nd }| j                  j                  t        d| j                  t        j                          t#               ||| j%                         ||||| j&                  | j(                  | j*                  |       d {   }|d   | _        | j/                  |d          | S 7 7 &w)Nr   r  r  r$  r   r
  r   r   r  r   r   r  r  r  
eval_groupeval_qidr   r   r   Tr   r   rW  r-  rX  r?  rY  r@  rZ  r_  r\  r[  r^  r.  r]  rO   rP   ra   )get_xgb_params_configure_fitr  r   r  r  r$  r   r   r   callable	objectiver@   r   rq  r#   rs  r>  get_num_boosting_roundsr[  r^  r.  _Booster_set_evaluation_resultr   r   r
  r  r   r  r  r  r  r]  r   rX  r  metricr?  r@  rZ  r  s                     rc   
_fit_asynczDaskXGBRegressor._fit_async]  s     $$&151D1Dv2
.vv > 
;;
;;
 ((
 LL	

 
 
 
 
 (
 $
 ,
 
 $:
 "6
 
  !
" LL#
$  $66%
& ,,'
 
, DNN#&:4>>&JCC((;; ++-$& 88:  "&"<"<nn]]! ) 
 
$  	*##GI$67]
4
s%   B4F6F7B6F-F.%FFNTr  r   r  r  r]  r  r  r   c                    t               j                         D ci c]  \  }}|dvs|| }}} | j                  | j                  fi |S c c}}w Nr   r   rt  r   r  r  r   r   r
  r  r   r  r  r]  r  r  r   kr  r   s                 rc   fitzDaskXGBRegressor.fit  sR     "(!1TAQ>S5S1TT t  9D99 U
   AA)r   r   r   r   rL   rK   r   r   r   r   r   r  r-   r9   r  r3   ru   r%  ra   rb   rc   rT   rT   W  s    LAA A
  0A o.A 8E/?*J$KLMA !)/)B CA 'x'@AA sDy!A E'8"345A "/2A 
AH   4815PT.2=AFJDH59:: :
  0: o.: 8E/?*J$KLM: %T	*+: E'3"89:: !)/)B C: 'x'@A: "/2: 
:  :rb   rT   zBImplementation of the scikit-learn API for XGBoost classification.c                       e Zd Zdededee   dee   deeeeef         deee      deee      dee	e
f   d	eeeef      d
ee   dd fdZddddddddddededee   dee   deeeeef         deee	e
f      d	eeeeef      deee      deee      d
ee   dd fdZdede
dee   dee   def
 fdZ	 	 	 ddede
dee   dee   def
dZej&                  j*                  e_        dede
de
dee   dee   def fdZ xZS )rU   r   r
  r  r   r  r  r  r  r]  r   r[   c                .  K   | j                         }| j                  |	||
      \  }}}}
t        | j                  fi d| j                  d| j
                  d| j                  d|d|dd dd d|d	|d
|
d|d|d|dd dd d| j                  d| j                  d| j                   d {   \  }}t        |t        j                        r<| j                  j                  t        j                  |             d {   | _        n6| j                  j                  |j!                                d {   | _        t#        | j                        r| j                  j%                         | _        t'        | j                        r| j                  j)                         | _        t+        j,                  | j                        | _        t/        | j                        | _        | j0                  dkD  rd|d<   | j0                  |d<   nd|d<   t3        | j4                        rt7        | j4                        }nd }| j                  j9                  t:        d| j                  t=        j>                         tA               ||| jC                         ||||| jD                  | jF                  | jH                  |       d {   }|d   | _%        t3        | j4                        s
|d   | _        | jM                  |d          | S 7 V7 7 7 Lw)Nr  r  r$  r   r
  r   r   r  r   r   r  r  r  r  r  r   r   r   r!   zmulti:softprobr  	num_classzbinary:logisticTr  rO   rP   )'r  r  r  r   r  r  r$  r   r   r   rs   r   r   r   uniqueclasses_drop_duplicatesr5   to_cupyr6   r  r   r   r{   
n_classes_r  r  r@   r   rq  r#   rs  r>  r  r[  r^  r.  r  r  r  s                     rc   r  zDaskXGBClassifier._fit_async  s     $$&151D1Dv2
.vv >KK
;;
 ((
 LL	

 
 
 
 
 (
 $
 ,
 
 $:
 "6
 
  !
" LL#
$  $66%
& ,,'
 
. a""&++"5"5biil"CCDM"&++"5"5a6G6G6I"JJDM& MM113DM$--( MM--/DMDMM2dmm,??Q"2F;"&//F;"3F;DNN#&:4>>&JCC((;; ++-$& 88:  "&"<"<nn]]! ) 
 
$  	*'#K0DN##GI$67E
0 DJ$
sK   B2L4L
5ALL6L>L?FLLALLLLNTr  c                    t               j                         D ci c]  \  }}|dvs|| }}} | j                  | j                  fi |S c c}}w r   r"  r#  s                 rc   r%  zDaskXGBClassifier.fit  sR     "(!1TAQ>S5S1TT t  9D99 Ur&  r  r  c                   K   | j                   dk(  rt        d      t        |   |d|||       d {   }t	        t        t        j                  d      t        j                        }t        t        | dd      ||      S 7 Ow)	Nzmulti:softmaxzSmulti:softmax doesn't support `predict_proba`.  Switch to `multi:softproba` insteadF)r   r  r  r   r  T)allow_unknown_chunksizesr.  r   )
r  r   r   r  r   r   r   vstackr?   r  )r   r   r  r   r  r  r2  r   s          rc   _predict_proba_asyncz&DaskXGBClassifier._predict_proba_async#  s      >>_,6  w-/#+ . 
 
  BII=ryy
 "'$a"@&&QQ
s   2BBABc                 B    | j                  | j                  ||||      S )N)r   r  r   r  )r  r3  )r   r   r  r   r  s        rc   predict_probazDaskXGBClassifier.predict_proba<  s1       %%/#+ ! 
 	
rb   r   r  c                t  K   t         	|   |||||       d {   }|r|S t        |j                        dk(  r|dkD  j	                  t
              }|S t        |j                        dk(  sJ t        |t        j                        sJ dt        dt        fd}t        j                  ||d      }|S 7 w)	Nr  rE   g      ?r!   xr[   c                 &    | j                  d      S )NrE   r  )argmax)r7  s    rc   _argmaxz1DaskXGBClassifier._predict_async.<locals>._argmaxh  s    xxQx''rb   )r  )r   r  r{   r   astyper   rs   r   r   r	   r  )
r   r   r  r  r   r  
pred_probspredsr:  r   s
            rc   r  z DaskXGBClassifier._predict_asyncM  s      !71'/#+ 2 
 

 z A%#%--c2E  z''(A---j"((333(3 (3 ( MM':CE-
s   B8B6BB8)TNN)r   r   r   rL   rK   r   r   r   r   r   r  r-   r9   r  ru   r%  r&   r3  r	   r5  r7   r   r  r   r   s   @rc   rU   rU     s   UU U
  0U o.U 8E/?*J$KLMU !)/)B CU 'x'@AU sDy!U E'8"345U "/2U 
Uz 4815PT.2=AFJDH59:: :
  0: o.: 8E/?*J$KLM: %T	*+: E'3"89:: !)/)B C: 'x'@A: "/2: 
:"RR  R o.	R
 ".1R 
R8 #'1548

  
 o.	

 ".1
 

 *77??M 	
   o. ".1 
 rb   rU   zZImplementation of the Scikit-Learn API for XGBoost Ranking.

    .. versionadded:: 1.4.0

aq  
    allow_group_split :

        .. versionadded:: 3.0.0

        Whether a query group can be split among multiple workers. When set to `False`,
        inputs must be Dask dataframes or series. If you have many small query groups,
        this can significantly increase the fragmentation of the data, and the internal
        DMatrix construction can take longer.

zf
        .. note::

            For the dask implementation, group is not supported, use qid instead.
)extra_parametersend_notec                    2    e Zd Zedddddededee   ded	df
 fd
       Z	d	e
e   f fdZdededee   dee   dee   deeeeef         deee      deee      deee      deeef   deeeef      dee   d	d fdZeddddddddddddddededee   dee   dee   dee   deeeeef         deee      deee      deeeef      deeeeef      deee      deee      dee   d	d fd       Zej*                  j.                  e_         xZS )rV   zrank:pairwiseFN)r  allow_group_splitr.  r  rA  r.  r  r[   c                d    t        |      rt        d      || _        t        |   d||d| y )Nz5Custom objective function not supported by XGBRanker.)r  r.  ra   )r  r   rA  r   r   )r   r  rA  r.  r  r   s        rc   r   zDaskXGBRanker.__init__  s8     ITUU!2J9xJ6Jrb   c                 F    t         |          }|j                  d       |S )NrA  )r   _wrapper_paramsaddr   rX  r   s     rc   rD  zDaskXGBRanker._wrapper_params  s"    (*

&'rb   r   r
  r   r  r   r  r  r  r  r  r]  r   c       
           K   | j                         }| j                  |||      \  }}}}t        | j                  fi d| j                  d| j
                  d| j                  d|d|dd d|d|d	|d
|d|d|d|dd d|	d| j                  d| j                  d| j                   d {   \  }}| j                  j                  t        d| j                  t        j                         t               ||| j                         |d ||
| j                   | j"                  || j$                         d {   }|d   | _        |d   | _        | S 7 7 w)Nr  r  r$  r   r
  r   r   r  r   r   r  r  r  r  r  r   r   r   T)r   r   rW  r-  rX  r?  rY  r@  rZ  r_  r\  r[  r^  r]  r.  rO   rP   )r  r  r  r   r  r  r$  r   r   r   r   rq  r#   rs  r>  r  r[  r^  r.  r  evals_result_)r   r   r
  r   r  r   r  r  r  r  r  r]  r   rX  r  r  r?  r@  r  s                      rc   r  zDaskXGBRanker._fit_async  s      $$&151D1Dv2
.vv >KK
;;
 ((
 LL	

 
 
 
 
 (
 $
 ,
 
 $:
 "6
 
  !
" LL#
$  $66%
& ,,'
 
* ((;; ++-$& 88:  "&"<"<nn]]! ) 
 
$  	*$Y/S
*
s%   B2E4E5B	E>E?EE)r   r   r  r   r  r  r  r  r]  r  r  r   r   r  c                2   d}||t        |      |t        d      dt        dt        t        j                     fd}dt
        t           dt        dt        t
        t        j                        fd}| j                  so ||      r$ ||d      r ||d	      r ||d
      r	 ||d      sJ ||J t        |      }t        | j                  |||||      \  }}}}}|g }g }g }g }|	sJ t        |      D ]  \  }\  }}|r||   nd }|r||   nd } ||      sJ |	sJ |	|   }|	r$ ||d      r ||d	      r ||d
      r	 ||d      sJ ||J t        |      |k7  r!t        | j                  |||||      \  }}}}}n|||||f\  }}}}}|j                  ||f       |j                  |       ||j                  |       ||j                  |        |}|}	|r|nd }|r|nd }| j                  | j                  |||||||	|
||||      S )Nz=Use the `qid` instead of the `group` with the dask interface.z`qid` is required for ranking.r   r[   c                 N    t        | t        j                        st        d      y)NzJWhen `allow_group_split` is set to False, X is required to be a dataframe.T)rs   r   r+   r   )r   s    rc   check_dfz#DaskXGBRanker.fit.<locals>.check_df  s'    a.$  rb   r   r   c                 Z    t        | t        j                        s| t        d| d      y)Nz*When `allow_group_split` is set to False, z is required to be a series.T)rs   r   r   r   )r   r   s     rc   	check_serz$DaskXGBRanker.fit.<locals>.check_ser  s:     c299-#/@ G! !  rb   r
  r  r   )r
  r  r   )r   r
  r   r  r   r  r  r  r]  r  r  r   )r   rK   r   r   r+   r   ru   r   rA  r'  rH   r  r   r   r  r  )r   r   r
  r   r   r  r   r  r  r  r  r]  r  r  r   r   rK  rM  X_idnew_eval_setnew_eval_qidnew_sample_weight_eval_setnew_base_margin_eval_setr   Xeyewebeqes                                rc   r%  zDaskXGBRanker.fit  s   & N*"4S/!;=>>	 	Ibll,C 		/*	25	x		*+	 %%c5)a%m_=k=9: ?q}44a5D4B+'51AsA}k #!!-/*+-(x#,X#6 <KAxB6L/2RVB4H-a0dB#B<'<#O8!!B %b%0%b#.%b/:%b-89 >bn<<"v~-; KKRR.*BB ./Q{-R*BB ''R1 ''+~299"=~077;7<: ('2L.RV ' 1I,d %   OO'##9!5+ ! 
 	
rb   )r   r   r   r3   ru   r  r   r0  r	   r   r   rD  rL   rK   r   r   r   r   r9   r-   r  r%  r:   r   r   r   s   @rc   rV   rV   o  s   2   )"')-K K  	K
 :&K K 
K  KS 
== =
 o&=  0= o.= 8E/?*J$KLM= !)/)B C= 'x'@A= 8O45= sDy!= E(G"345= "/2= 
=@   ,0)-3715PT:>8<.3=AFJDH59!w
w
 w

 (w
 o&w
  0w
 o.w
 8E/?*J$KLMw
 Xo67w
 8O45w
 %T	*+w
 E(C"89:w
 !)/)B Cw
 'x'@Aw
  "/2!w
" 
#w
  w
v --''CKKrb   rV   zjImplementation of the Scikit-Learn API for XGBoost Random Forest Regressor.

    .. versionadded:: 1.4.0

r  zI
    n_estimators : int
        Number of trees in random forest to fit.
)r>  c                   F    e Zd Zedddddddee   dee   dee   d	ee   d
ee   deddf fd       Zde	e
ef   f fdZdefdZddddddddddededee   dee   deeeeef         deeeef      deeee
ef      deee      deee      dee   dd f fdZ xZS )rW   rE   皙?h㈵>Nlearning_rate	subsamplecolsample_bynode
reg_lambdar.  r\  r]  r^  r_  r.  r  r[   c          	      0    t        |   d|||||d| y Nr[  ra   r   r   r   r\  r]  r^  r_  r.  r  r   s          rc   r   zDaskXGBRFRegressor.__init__g  0     	 	
'-!	
 	
rb   c                 B    t         |          }| j                  |d<   |S Nnum_parallel_treer   r  n_estimatorsrF  s     rc   r  z!DaskXGBRFRegressor.get_xgb_params{  &    ')&*&7&7"#rb   c                      yNrE   ra   r   s    rc   r  z*DaskXGBRFRegressor.get_num_boosting_rounds      rb   Tr  r   r
  r  r   r  r  r]  r  r  r   c                    t               j                         D ci c]  \  }}|dvs|| }}}t        | j                  | j                         t        |   di | | S c c}}w Nr!  ra   rt  r   r>   r[  r^  r   r%  r   r   r
  r  r   r  r  r]  r  r  r   r$  r  r   r   s                 rc   r%  zDaskXGBRFRegressor.fit  _     "(!1TAQ>S5S1TT455t~~Fd U
   A#A#r   r   r   r3   r   r
  r0  r	   r   r   ru   r  r   r  rL   rK   r   r   r   r  r-   r9   r%  r   r   s   @rc   rW   rW   Z  s      *+%(,/&*)-
  
 E?	

 #5/
 UO
 :&
 
 

  
&S#X 
  4815PT.2=AFJDH59 
  0 o. 8E/?*J$KLM %T	*+ E'3"89: !)/)B C 'x'@A "/2 
 rb   rW   zkImplementation of the Scikit-Learn API for XGBoost Random Forest Classifier.

    .. versionadded:: 1.4.0

c                   F    e Zd Zedddddddee   dee   dee   d	ee   d
ee   deddf fd       Zde	e
ef   f fdZdefdZddddddddddededee   dee   deeeeef         deeeef      deeee
ef      deee      deee      dee   dd f fdZ xZS )rX   rE   rY  rZ  Nr[  r\  r]  r^  r_  r.  r  r[   c          	      0    t        |   d|||||d| y ra  rb  rc  s          rc   r   zDaskXGBRFClassifier.__init__  rd  rb   c                 B    t         |          }| j                  |d<   |S rf  rh  rF  s     rc   r  z"DaskXGBRFClassifier.get_xgb_params  rj  rb   c                      yrl  ra   r   s    rc   r  z+DaskXGBRFClassifier.get_num_boosting_rounds  rm  rb   Tr  r   r
  r  r   r  r  r]  r  r  r   c                    t               j                         D ci c]  \  }}|dvs|| }}}t        | j                  | j                         t        |   di | | S c c}}w ro  rp  rq  s                 rc   r%  zDaskXGBRFClassifier.fit  rr  rs  rt  r   s   @rc   rX   rX     s      *+%(,/&*)-
  
 E?	

 #5/
 UO
 :&
 
 

  
&S#X 
  4815PT.2=AFJDH59 
  0 o. 8E/?*J$KLM %T	*+ E'3"89: !)/)B C 'x'@A "/2 
 rb   rX   )NN)
   )r   loggingcollectionsr   
contextlibr   	functoolsr   r   r   	threadingr   typingr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r_   r   r   r   r   r   r  r   r   dask.delayedr   r   packaging.versionr   r    r^    r"   r#   _typingr$   r%   r&   callbackr'   r(   r0  r)   CollArgsr*   r   compatr+   r,   corer-   r.   r/   r0   r1   r2   r3   r4   r   r5   r6   sklearnr7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   trackerrC   trainingrD   rh  rF   rG   rH   r  rI   rJ   r   r   rK   __annotations__rL   rM   __all__	getLoggerr|   rd   r  rg   ri   r   ru   r~   r   rQ   r   rR   r  r  r"  rS   r,  r:  r>  rE  rL  rm  rV  rq  ry  r  r  r  r  r
  r  r   rY   r  rZ   r  r  r  rT   rU   rV   rW   rX   ra   rb   rc   <module>r     s  2f  # % 4 4     (           % 4 ! @ @ ' - * 1 /	 	 	 0    # , K K 7"288R\\299#DE E"((BLL01	 1F 
		+	, +w + + 9 9 9 8 8 8--hsmXeCHo%>>?@- c]- 	-`SM U38_- c]	
 U*88 U!56 ;O "l l^ :t_D)*D
2x<
 D D c]	D
 DN9+ 9x%T %S %W % )-%)	% %% d38n%% z"	%
 
#uS#X
%P4(4S>2 4 (%S8H2I)J K	#YK#YK 4K	K  sm	
   7Dw|,--.>e e S>e d38n%	e
 cNe e e HU;#3456e 
)	e $C=e T	"e  e !123e F#e z"e  l!eP 
 	6 :>#+/#'%)6:&*%)6 6cN6 6 	6 HU;#34566 
)	6 $C=6  6 T	"6 !1236 F#6 z"6 	6 6r,$ ,e , ,
)-c;?:RR "R 	R
 /*R S/R sCx.R Rj"" #",0";?"KN"
5c?DcN*+", ).w>R/R)S0W WS>W $ 445W 	W W W W W W W W $W W Wt   YY!#"&,3V)*3Vw(<<=3V V#
$3V
 3V 3V 3V 3V 3V 3V 3V $3V 3V 	3V 3Vl> > S>> $ 445	>
 > $> > > > /*> > >B  '-YY"-1:)*:w(<<=: :
 $: : : : /*: : 	: :z")*"SM" #" c]	"
 " ;eK,<&=!>??@":  *> UW( UWp =g?VW:')< W:W:t H7t)+> t	tn 
 7
%0P(N$7 P(10P(f 
 k/) //d 
 k/+ //rb   