
    bi*                        d dl mZ d dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dlm
c mZ d dlmZ d dlmZmZ d dlmZmZ  G d de
j,                  e      Zy)	    )annotationsN)Optional)
BufferDict)BaseTunerLayercheck_adapters_to_merge)check_deepspeed_zero3_enabledgather_params_ctxc                       e Zd ZdZdZ	 d	 	 	 	 	 	 	 	 	 d fdZed        ZddZd Z	d Z
dddZdd	Zd
 ZddZddZ xZS )TrainableTokensLayer)trainable_tokens_delta)token_indicestrainable_tokens_originalc                   t         |           || _        || _        || _        |r|gng | _        | j                  s9t        j                  i       | _	        t        i       | _        i | _        g | _        y | j                  j                  | _	        | j                  j                  | _        | j                  j                  | _        g | _        y N)super__init__
base_layer_active_adapterkwargs_tied_adaptertied_adapternnParameterDictr   r   r   r   merged_adapters)selfr   adapter_namer   r   r   	__class__s         ]/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/trainable_tokens/layer.pyr   zTrainableTokensLayer.__init__%   s     	$+ 0<l^   *,*:*:2*>D'-7^D*!#D  " +/*;*;*R*RD'-1->->-X-XD*!%!2!2!@!@D  "    c                :    | j                   r| j                   d   S y )Nr   )r   )r   s    r   r   z!TrainableTokensLayer.tied_adapterH   s    %%a((r   c                   d}t        j                  dt         j                  j                               }t	        |gd      5  t        j                         |k(  r||   j                         }n-t        j                  t        |      |f|j                  |      }ddd       t        j                  |       |S # 1 sw Y   "xY w)zDeepSpeed zero3 specific code to initialize trainable tokens.

        Ensures that only the necessary weights are collected to a single rank, initialized, and then shared with all
        ranks.
        r   cudaN)modifier_rankdtypedevice)src)torchr&   r"   current_devicer	   distget_rankcloneemptylenr%   	broadcast)r   weightrows	embed_dimsrc_rankr&   token_weightss          r   _collect_token_weightsz+TrainableTokensLayer._collect_token_weightsN   s     fejj&?&?&ABxt< 		}}(* &t 2 2 4 !&Y	* ,,!!		 	}(3		 		s   AB==Cc                   |j                  dd       ry |d   | j                  |<   |j                  dd      }| j                         j                  }| j                         j                  }|rHt               r!| j                  || j                  |   |      }na| j                  | j                  |      }nDt        j                  t        | j                  |         |f|j                  |j                        }t        j                  |j                         d      | j                  |<   |j                         | j                   |<   | j#                  |       y )Nr   r   init_weightsTr$   )requires_grad)getr   get_base_layerr0   embedding_dimr   r5   r(   randnr.   r%   r&   r   	Parameterr,   r   r   %_move_adapter_to_device_of_base_layer)r   r   r   r7   r0   r2   valuess          r   update_layerz!TrainableTokensLayer.update_layerg   s#   ::nd++1/+B<(zz.$7 $$&--'')77	,.44VT=O=OP\=]_hiT%7%7%EF [[T''56	Bll}}F 57LL_c4d##L17=||~&&|422<@r   c                   t        |      dk  ryt               }t        || j                  z         D ]T  }t        | j                  |         }t        |j	                  |            rt        d| d      |j                  |       V y)a  Raises an error if the token indices of the given adapter names are overlapping.
        This is currently not supported and can lead to undefined behavior of the model if no specific merging between
        the overlapping indices' values is applied.
           NzToken indices of adapter zy are already defined and would result in undefined merging behavior. Only disjunct token indices are currently supported.)r.   setr   r   intersection
ValueErrorupdate)r   adapter_namesindicesr   	index_sets        r   _check_overlapping_tokensz.TrainableTokensLayer._check_overlapping_tokens   s    
 }"%  0D0D DE 	&LD..|<=I7''	23 /~ >g g  NN9%	&r   c                B   t        | |      }|sy | j                  |       | j                  j                  j                  }|D ]  }t        j                  | j                  |         j                  |j                        }| j                  |   j                  |      }|j                  d||      }|sst        j                  |      j                         rt        d| d       || j                  j                  _        | j                  j!                  |       y )Nr   dimindexsourcez1NaNs detected in the merged weights. The adapter z seems to be broken)r   rJ   r   r0   datar(   tensorr   tor&   r   
index_copyisfiniteallrE   r   extend)r   
safe_mergerG   mergedr   rN   deltass          r   mergezTrainableTokensLayer.merge   s    /mD&&}5'',,) 	xLLL!3!3L!ABEEfmmTE00>AA&IF&&1E&&IF%.."8"<"<"> #TUaTbbu!vww	x '-###M2r   c                F   | j                   st        j                  d       y t        | j                        dkD  r| j                  j                         }t        j                  | j                  |         j                  | j                  j                  j                        }| j                  |   j                  | j                  j                        }| j                  j                  j                  j                  d||       t        | j                        dkD  ry y )Nz Already unmerged. Nothing to do.r   rL   )rX   warningswarnr.   r   popr(   rQ   r   rR   r   r0   r&   r   rP   index_copy_)r   r   rN   	originalss       r   unmergezTrainableTokensLayer.unmerge   s    {{MM<=$&&'!+//335LLL!3!3L!ABEEdooF\F\FcFcdE66|DGGH^H^_IOO""''33y3Y $&&'!+r   c                   | j                   j                  }|D ]o  }t        j                  | j                  |         j                  |j                        }| j                  |   j                  |      }|j                  d||      }q |S )Nr   rL   )	r   r0   r(   rQ   r   rR   r&   r   rS   )r   active_adaptersWr   rN   rY   s         r   get_merged_weightsz'TrainableTokensLayer.get_merged_weights   s    OO""+ 	@LLL!3!3L!ABEEahhOE00>AA!DF%?A	@
 r   c           	        | j                   s|s4| j                  r| j                           | j                  |g|i |}|S | j                  r | j                  |g|i |}|S | j	                  |       | j                  |      }t        | j                  t        j                  j                        rt        j                  ||| j                  j                  | j                  j                  | j                  j                  | j                  j                  | j                  j                         }|S t        | j                  t        j                  j"                        rt        j$                  ||      }|S t'        d      )N)inputr0   padding_idxmax_norm	norm_typescale_grad_by_freqsparse)rg   r0   zZTrainableTokensLayer wraps an unknown layer type, maybe you are targeting the wrong layer?)disable_adaptersrX   ra   r   rJ   re   
isinstancer(   r   	EmbeddingF	embeddingrh   ri   rj   rk   rl   LinearlinearrE   )r   xrc   argsr   resultrd   s          r   forward_adaptersz%TrainableTokensLayer.forward_adapters   sF     {{$T__Q888FF E [[$T__Q888FB ? **?;''8A $//588+=+=> $ ; ;!__55"oo77'+'I'I??11(  DOOUXX__= 	 !p r   c                D     | j                   || j                  g|i |S r   )rw   rc   )r   rt   ru   r   s       r   forwardzTrainableTokensLayer.forward   s'    $t$$Q(<(<NtNvNNr   r   )
r   z	nn.Moduler   strr   z	list[int]r   zOptional[TrainableTokensLayer]returnNone)r0   torch.Tensorr1   r}   r2   intr{   r}   )FN)rW   boolrG   zOptional[list[str]]r{   r|   )r{   r|   )rt   r}   r{   r}   )__name__
__module____qualname__adapter_layer_namesother_param_namesr   propertyr   r5   r@   rJ   rZ   ra   re   rw   ry   __classcell__)r   s   @r   r   r      s    5 G 8<!"!" !" !	!"
 5!" 
!"F  
2"AH&(3,
Z'ROr   r   )
__future__r   r\   typingr   r(   torch.distributeddistributedr*   torch.nnr   torch.nn.functional
functionalrp   peft.tuners._buffer_dictr   peft.tuners.tuners_utilsr   r   peft.utils.integrationsr   r	   Moduler    r   r   <module>r      s>    #         / L TWO299n WOr   