
    bi                      j    d dl Z d dlZd dlmZ d dlmZ  G d dej                  j                        Zy)    N)CrossEntropyLoss)gather_params_ctxc                   J     e Zd ZdZ fdZd Zd Zd Zd Ze	d        Z
 xZS )CPTEmbeddingz
    CPTEmbedding is a custom embedding layer designed for Context-aware Prompt Tuning (CPT) in PEFT. It initializes
    embeddings, applies prompt-specific projections, and computes loss using label masks.
    c                 *   t         |           t        j                  |      | _        |j
                  }t        j                  j                  ||j                        | _
        |j                  s|j
                  t        |j                        k(  sJ t        j                  |j                        j                  |j                   j"                        }t%        |j'                               5   ||      j)                         j+                         }ddd       j                  t        j,                        }t        j                  j/                  |      | j                  _        t        j                  j                  ||j                        | _        t        j2                  | j0                  j                         j                  t        j,                        | j0                  j                   _        | j7                          y# 1 sw Y   xY w)aI  
        Initializes the CPTEmbedding module.

        Args:
            config (Namespace):
                Configuration object containing model hyperparameters and CPT-specific settings.
            word_embeddings (torch.nn.Embedding):
                The base word embedding layer used to initialize CPT embeddings.
        N)super__init__copydeepcopyconfignum_virtual_tokenstorchnn	Embedding	token_dim	embeddinginference_modelencpt_token_ids
LongTensortoweightdevicer   
parametersdetachclonefloat32	Parameterdelta_embedding
zeros_likedataset_updated_tokens)selfr   word_embeddingsr   init_token_idsword_embedding_weights	__class__s         P/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/cpt/model.pyr	   zCPTEmbedding.__init__   s    	mmF+#66 ++,>@P@PQ $$,,F4H4H0IIII"--f.B.BCFFG]G]GdGdeN"?#=#=#?@ Z)8)H)O)O)Q)W)W)Y&Z%;%>%>u}}%M"$)HH$6$67M$NDNN!  %xx112DfFVFVW+0+;+;D<P<P<W<W+X+[+[\a\i\i+j##( 	!Z Zs   0%H		Hc                     t        j                         5  | j                  |      }ddd       | j                         | j                  j
                  _        | j	                  |      }|z   S # 1 sw Y   HxY w)a#  
        Computes the prompt embeddings and applies delta adjustments.

        Args:
            indices (torch.Tensor):
                Indices of the tokens to be embedded.

        Returns:
            torch.Tensor:
                Sum of prompt embeddings and delta embeddings.
        N)r   no_gradr   get_projectionr   r   r!   )r#   indicesprompt_embeddingsdelta_prompt_embeddingss       r(   forwardzCPTEmbedding.forward?   sm     ]]_ 	8 $w 7	8 ,0+>+>+@##("&"6"6w"? #:::	8 	8s   A..A7c                    t        j                  | j                  j                        j	                         }t        j
                  |d      dk(  }t        j
                  |d      dk(  }t        j
                  |d      dk(  }||z  |z  j                  dd      fd}| j                  j                  j                  |       y)zq
        Sets up a backward hook to selectively update token gradients based on the CPT token type mask.
                    c                 D    | j                  | j                        z  } | S )N)r   r   )gradmasks    r(   backward_hookz6CPTEmbedding.set_updated_tokens.<locals>.backward_hook_   s    $''$++..DK    N)
r   Tensorr   cpt_tokens_type_masklong	remainderviewr   r   register_hook)r#   tensor_ICL_maskmask_input_template
mask_inputmask_output_templater9   r8   s         @r(   r"   zCPTEmbedding.set_updated_tokensT   s      ,,t{{'G'GHMMO#oooqAQF___a8A=
$BaG"Z/2FFyyQ	 	##11-@r:   c                 4   | j                   j                  }d}| j                   j                  t        j                  t        j
                  | j                   j                  dz  g            z  }| j                   j                  t        j                  t        j
                  | j                   j                  dz  g            z  }t        j                  t        j
                  |            j                  t        j                        |z  }t        j
                  |      j                         }|||dkD  t        j                  |d      dk(  z  <   |||dkD  t        j                  |d      dk(  z  <   |||dkD  t        j                  |d      dk(  z  <   |S )Ng|=i   r   r1   r2   r4   r3   )r   r<   opt_projection_format_epsilonr   sqrtr;   r   opt_projection_epsilon	ones_liker   r   r=   r>   )r#   r<   	MIN_VALUEnormalized_format_epsnormalized_input_epsepsilons         r(   get_epsilonzCPTEmbedding.get_epsilone   s^   #{{??	 !% I IEJJLL$++//$678M
 !
  ${{AAEJJLL$++//$678E
  
 //%,,/C"DEHHWZcc$||,@AFFH`u%)eoo>RTU.VZ[.[\]`u%)eoo>RTU.VZ[.[\]`t%)eoo>RTU.VZ[.[\]r:   c           	      L   t        j                         5  | j                  j                  j	                         j                  | j                  j                  j                        }t        j                  |dd      }|dkD  }t        j                  |      rv| j                         j                  | j                  j                  j                        }||xx   ||   ||   j                  ||         z  j                  dd      z  cc<   |cddd       S # 1 sw Y   yxY w)za
        Applies epsilon-based projection to the delta embeddings to control their norm.
        r3   r2   )pdimr   )minr5   N)r   r*   r   r   r   r   r   normanyrN   clampr?   )r#   new_embeddings_weights
token_normprojection_maskrM   s        r(   r+   zCPTEmbedding.get_projection{   s     ]]_ 
	*%)%9%9%@%@%F%F%H%K%KDL`L`LgLgLnLn%o"$:aQGJ(1nOyy)**,//0D0D0K0K0R0RS&7O,
?0K0Q0QV]^mVn0Q0op$r1+7 *
	* 
	* 
	*s   C;DD#c                 6   | j                   j                  }| j                   }|j                  |      }|dddddf   j                         }|dddf   j                         }|dddf   j                         }|j	                         j                         dk7  j                         }	|j                  \  }
}}t        dd      } ||j                  |
|z  |      |j                  |
|z              }|j                  |
|      }|	j	                         j                         j                         }t        |
      D ]  }||   dkD  ||   d	z  dk(  z  }||   |   j                         }t        j                  ||         j                  |
      j                         }d}t        j                  |dg      D ]  }||||   |k(  <   ||j                   z  } |j"                  dk(  s||xx   |z  cc<    ||	   ||	   z  j%                         }|| _        | S )al  
        Computes the loss for CPT models with optional exponential decay.

        Args:
            base_model_output (ModelOutput):
                Output from the base model containing logits.
            labels (torch.Tensor):
                Ground-truth labels for the input tokens.
            cpt_type_mask (torch.Tensor):
                Token type mask used for filtering valid loss terms.
            config (Namespace):
                Configuration object containing loss-related hyperparameters.

        Returns:
            ModelOutput:
                The base model output with computed loss.
        .Nr5   r2   inone)	reductionignore_indexr   r1   )r   decay)logitsr   r   
contiguousr   r   boolshaper   r?   floatrangeuniquer   rI   flipopt_loss_decay_factoropt_weighted_loss_typemeanloss)base_model_outputlabelscpt_type_maskr   r   	lm_logitsshift_logitsshift_labelsshift_cpt_type_maskshift_labels_bool
batch_size
seq_length
vocab_sizeloss_fctri   shift_labels_weightsi
idx_labels
labels_idsexponential_decaydecay_valuelabel_mask_idxs                         r(   calculate_losszCPTEmbedding.calculate_loss   sC   ( #))00%,,	6" !crc1-88:c12g113+CG4??A)//188:dBHHJ-9-?-?*
J
 $f4Hj:5zBLDUDUV`cmVmDn
 yyZ00668??AGGIz" 
	=A-a0149LQ9ORS9SWX9XYJ,Q/
;BBDJ %0CA0F G J JRX J Y _ _ aK"'**Z!"= <NY!"5a"8N"JKv;;;< ,,7$Q'+<<'
	= &'*>?P*QQWWY!%  r:   )__name__
__module____qualname____doc__r	   r/   r"   rN   r+   staticmethodr}   __classcell__)r'   s   @r(   r   r      s7    
 "D;*A",*$ :! :!r:   r   )	r
   r   torch.nnr   peft.utils.integrationsr   r   Moduler    r:   r(   <module>r      s)      % 5q!588?? q!r:   