
    biYG                         d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZm	Z	 d dl
mZ  e       rd dlmZ d dlmZ  G d d	e      Z G d
 d      Z G d d      Zy)    N)Optional)extract_model_from_parallel)StoppingCriteriaStoppingCriteriaList)is_rich_available)print)Textc                       e Zd ZdZd Zd Zy)StringStoppingCriteriazUCustom `StoppingCriteria` which checks if all generations in the batch are completed.c                 .    || _         || _        d| _        y )NT)stop_strings	tokenizer
first_call)selfr   r   s      [/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/environment/base_environment.py__init__zStringStoppingCriteria.__init__!   s    ("    c                 
  	 | j                   rIt        |j                  d         D cg c]  }d c}| _        |j                  d   dz
  | _        d| _         | j
                  j                  |dd| j                  df         }g }t        |      D ]N  \  }	t        	fd| j                  D              }|j                  |       |r8| j                  |xx   dz  cc<   P t        |      rd| _         t        |      S c c}w )zHReturns true if all generated sequences contain any of the stop strings.r      FNc              3   &   K   | ]  }|v  
 y wN ).0stop_stringdecoded_generations     r   	<genexpr>z2StringStoppingCriteria.__call__.<locals>.<genexpr>0   s     #k+K3E$E#ks   T)r   rangeshapegenerated_tokensstart_lengthr   batch_decode	enumerateanyr   appendall)
r   	input_idsscoreskwargs_decoded_generationsdoneisequence_completer   s
            @r   __call__zStringStoppingCriteria.__call__&   s    ??05iooa6H0I$J1Q$JD! ) 3a 7D#DO"nn99)AtGXGXGZDZ:[\%./B%C 	.!A! ##kY]YjYj#k kKK)*$%%a(A-(		. t9"DO4y %Ks   	D N)__name__
__module____qualname____doc__r   r/   r   r   r   r   r      s    _
r   r   c                   T    e Zd ZdZddZddZddZed        Zd Z	ddZ
ddZd	 Zy
)TextHistoryzrThe TextHistory class keeps track of the history of an interaction between the language model and the environment.c                    g | _         g | _        g | _        t        j                  g t        j
                        j                  |j                        | _        d| _	        t        j                  g t        j
                        j                  |j                        | _
        d| _        d| _        d| _        d| _        d| _        d| _        d| _        | j%                  |||	       y
)a  
        Initialize TextHistory.

        Args:
            text (`str`): The text of the first segment.
            tokens (`torch.LongTensor`): The tokens of the first segment.
            system (`bool`, *optional*): Whether the first segment is a system or user segment.
        )dtype Fg        zblack on grey85zblack on cyan3zblack on deep_sky_blue1zblack on plum1systemN)system_spans
text_spanstoken_spanstorchtensorlongtodevicetoken_maskstexttokens	completed	truncatedrewardprompt_colorsystem_colormodel_colorreward_colorappend_segment)r   rD   rE   r:   s       r   r   zTextHistory.__init__>   s      <<%**=@@O	ll2UZZ8;;FMMJ-,4,D&8r   c                    t        |      dk(  st        |      dk(  rt        d      t        | j                        }| xj                  |z  c_        | j                  j	                  |t        | j                        f       | j
                  j	                  |       t        | j                        }t        j                  | j                  |f      | _        |r:t        j                  | j                  t        j                  |      f      | _	        n9t        j                  | j                  t        j                  |      f      | _	        | j                  j	                  |t        | j                        f       y)a"  
        Append a new segment to the history.

        Args:
            text (`str`): The text of the new segment.
            tokens (`torch.LongTensor`): The tokens of the new segment.
            system (`bool`, *optional*): Whether the new segment is a system or user segment.
        r   z1Can't append empty text or token list to history.N)len
ValueErrorrD   r<   r%   r;   rE   r>   catrC   
zeros_like	ones_liker=   )r   rD   rE   r:   original_text_lengthoriginal_token_lengths         r   rM   zTextHistory.append_segmentX   s    t9>S[A-PQQ"499~		T	 4c$))nEF  ( #DKK 0iif 56$yy$*:*:E<L<LV<T)UVD$yy$*:*:EOOF<S)TUD!6DKK8H IJr   c                      d| _         || _        y)z0
        Mark the history as completed.
        TN)rF   rG   )r   rG   s     r   completezTextHistory.completet   s     "r   c                 D    | j                   d   \  }}| j                  || S )z,
        Get the last text segment.
        r   )r<   rD   )r   startends      r   last_text_segmentzTextHistory.last_text_segment{   s'    
 __R(
syys##r   c                     | j                   d   d   }| j                  d| }| j                  |d }| j                  |d }|||fS )zB
        Split the tokens into query and response tokens.
        r   r   N)r=   rE   rC   )r   split_indexqueryresponsemasks        r   split_query_response_tokensz'TextHistory.split_query_response_tokens   sX     &&q)!,L[);;{|,-h$$r   c                 B   t               st        d      t        | j                        }|j	                  | j
                  | j                  d   d   | j                  d   d          t        | j                  dd       D ]U  \  }\  }}| j                  |dz      r|j	                  | j                  ||       9|j	                  | j                  ||       W |j                  d| j                   | j                         t        |       |r| j                          yy)z)
        Print the text history.
        zdThe `rich` library is required to display text with formatting. Install it using `pip install rich`.r   r   N


Reward: style)r   ImportErrorr	   rD   stylizerI   r<   r#   r;   rJ   rK   r%   rH   rL   r   show_colour_legend)r   show_legendrD   r-   rY   rZ   s         r   	show_textzTextHistory.show_text   s     !"v  DIIT&&(:1(=tq?QRS?TU()<= 	;OA|s  Q'T..s;T--uc:		; 	l4;;-08I8IJd##% r   c                 8   t               st        d      t               }| j                  d   d   }t	        t        | j                  | j                              D ]  \  }\  }}||k  rL|j                  |j                  |j                               | j                         |j                  d       Z|dk(  rL|j                  |j                  |j                               | j                         |j                  d       |j                  |j                  |j                               | j                         |j                  d        |j                  d| j                   | j                         t!        |       |r| j#                          yy)z+
        Print the history tokens.
        zfThe `rich` library is required to display tokens with formatting. Install it using `pip install rich`.r   r   rd    rc   N)r   rf   r	   r=   r#   ziprE   rC   r%   convert_ids_to_tokensitemrI   rJ   rK   rH   rL   r   rh   )r   r   ri   rD   
prompt_endr-   tokenr`   s           r   show_tokenszTextHistory.show_tokens   sP    !"7 
 v%%a(+
 )#dkk4;K;K*L M 		!A}t:~I;;EJJLIQUQbQbcC I;;EJJLIQUQbQbcC I;;EJJLIQUQaQabC 		! 	l4;;-08I8IJd##% r   c                    t               st        d      t        d      }|j                  d| j                         |j                  d       |j                  d| j
                         |j                  d       |j                  d| j                         |j                  d       |j                  d| j                         |j                  d	       t        |       y
)z*
        Print the colour legend.
        znThe `rich` library is required to display colour legends with formatting. Install it using `pip install rich`.z

(Colour Legend: Promptrd   |SystemModelReward)N)	r   rf   r	   r%   rI   rJ   rK   rL   r   )r   rD   s     r   rh   zTextHistory.show_colour_legend   s     !"7  *+HD$5$56CHD$5$56CG4#3#34CHD$5$56Cdr   NT)F)r0   r1   r2   r3   r   rM   rW   propertyr[   ra   rj   rr   rh   r   r   r   r5   r5   ;   s=    |94K8# $ $	%&.&6r   r5   c                   t    e Zd ZdZ	 	 	 	 	 	 	 	 	 ddZd Zd Zd Zd Zd Z	dd	Z
dd
Z	 	 ddedee   fdZy)TextEnvironmentz[
    The TextEnvironment enables interaction of a LLM with an environment using tools.
    Nc
                 $   t        j                  dt               || _        || _        || _        t        |t              r|| _        n*|D 
ci c]  }
|
j                  j                  |
 c}
| _        || _        || _        d| _        d| _        d| _        d| _        || _        || _        |	t               | _        n|	| _        t)        | j                  d      | _        t-        | j                        j.                  j0                  | _        yc c}
w )a  
        Initialize TextEnvironment.

        Args:
            model (`PreTrainedModelWrapper`):
                The model to use for generation.
            tokenizer (`transformers.PreTrainedTokenizer`):
                The tokenizer to use for generation.
            tools (list):
                A list of tools to use for interaction.
            reward_fn (function):
                A function that takes a string and returns a reward.
            prompt (str):
                The base prompt to use for generation. Is prepended to the tasks.
            max_turns (Optional[int]):
                The maximum number of turns to allow.
            max_tool_response (Optional[int]):
                The maximum number of characters to allow in a tool response.
            max_length (Optional[int]):
                The maximum number of tokens to allow in an episode.
            generation_kwargs (Optional[dict]):
                A dictionary of keyword arguments to pass to the model's generate method.
        zThis class is deprecated and will be removed in version 0.21.0. To enable tool use with LLMs, check out smolagents (https://huggingface.co/docs/smolagents/index)z	<request>z<call>z
<response>z<submit>Nis_encoder_decoder)warningswarnDeprecationWarningmodelr   prompt
isinstancedicttools	__class__r0   	reward_fn
max_lengthrequest_token
call_tokenresponse_tokensubmit_token	max_turnsmax_tool_responsegeneration_kwargshasattrr   r   pretrained_modelrB   current_device)r   r   r   r   r   r   r   r   r   r   tools              r   r   zTextEnvironment.__init__   s    F 	 p	
 
"eT"DJDIJD$..1147JDJ"$("*&"!2$%)VD"%6D"")$**6J"K9$**EVV]]! Ks   Dc           
         d}|D cg c]  }| j                   |z    }}|D cg c]O  }| j                  |d      j                  d   j                  | j                  j
                  j                        Q }}t        ||      D cg c]  \  }}t        ||d       }	}}t        d |	D              r|| j                  k  r| j                  |	      }	| j                  |	      }	t        t        |	            D ]  }
| j                  |	|
         |	|
<    | j                  |	d      }	|d	z  }t        d |	D              r|| j                  k  r | j                   |	fi | t#        t$        t        |	D cg c]  }|j'                          c}       \  }}}|	D cg c]  }|j(                   }}|||||	fS c c}w c c}w c c}}w c c}w c c}w )
z
        Run the environment on a list of queries.

        Args:
            queries (list[str]): A list of queries to run the model in the environment on.
        r   ptreturn_tensorsTr9   c              3   6   K   | ]  }|j                      y wr   )rF   )r   historys     r   r   z&TextEnvironment.run.<locals>.<genexpr>*  s     AGg'''As   F
model_turnr   )r   r   r'   rA   r   r   rB   rm   r5   r$   r   generatetasks_end_checkr   rO   stepcompute_rewardmaplistra   rH   )r   queriesrewards_kwargsturnstaskr^   queries_tokensqqt	historiesr-   r   	responsesmasksrewardss                  r   runzTextEnvironment.run  s    29:$4;;%:: !
 NN5N6@@CFFtzzGbGbGiGij
 

 CFg~B^_B[Bt4_	_AyAAednnF\i0I,,Y7I3y>* 7#yy16	!7,,Y5,IIQJE AyAAednnF\ 	I88 %(cir3s^eG4W4W4Y3s.t$u!E1:;g7>>;;	5'9<<- ;

 ` 4t;s   F9AF>G6G	Gc                    | j                  |      \  }}|r|j                  |       |j                  r|S | j                  |j                        \  }}||d|j                   }n*|| j
                  vrd| d}	  | j
                  |   |      }t        |      | j                  kD  r|d| j                  dz
   dz   }|j                  || j                  z   | j                  || j                  z   d	
      j                  d   j                  | j                  j                   j"                        d       |S # t        $ r}dt        |       }Y d}~d}~ww xY w)z
        Step the environment forward one turn.

        Args:
            history (`TextHistory`): The history to step forward.
        rG   NzUnknown tool call: zUnknown tool .zTool error:    z...r   r   r   Tr9   )task_end_checkrW   rF   parse_tool_callr[   r   	ExceptionstrrO   r   rM   r   r   r'   rA   r   r   rB   )r   r   rG   endedr   r^   r_   errors           r   r   zTextEnvironment.step:  sd     ..w7	5y1N**7+D+DEe<5=,W-F-F,GHH4::%*4&27+4::d+E2 x=4111 >4#9#9A#=?%GHt***NN8d&9&99$NOYqR

++223 	 	
   7)#e*67s   >D; ;	EEEc                 J   t        j                  d| j                   d| j                   d|t         j                        }|y|j                         }t        j                  d|      }|y|j                  d      }dj                  |j                  d      dd       }||fS )	zX
        Parse request string. Expected format: <request><tool_name>query<call>
        z(?<=z).*?(?=ry   N)NNz<(.*?)>r   >)researchr   r   DOTALLgroupjoinsplit)r   rD   resultextracted_textr   r^   s         r   r   zTextEnvironment.parse_tool_call_  s     T$"4"4!5WT__<MQOQUWYW`W`a >#\\^N:~6 ><<?D --c21267U{r   c                      | j                   |D cg c]  }|j                   c}fi |}t        ||      D ]  \  }}||_         |S c c}w )z=
        Compute the reward for a list of histories.
        )r   r[   rm   rH   )r   r   reward_kwargsr   r   rH   s         r   r   zTextEnvironment.compute_rewardx  sX     !$..9!U'";";!UgYfg"9g6 	$OGV#GN	$ "Vs   A	c                 Z   t        |      D cg c]  \  }}|j                  r| }}}|D cg c]  }||   j                   }}| j                  |      }| j                  j                  |      }t        |||      D ]  \  }}}	||   j                  ||	d        |S c c}}w c c}w )z=
        Generate responses for a list of histories.
        Fr9   )r#   rF   rE   _generate_batchedr   r"   rm   rM   )
r   r   r-   r   active_historiesquery_tensorsresponse_tensorsresponse_textsresponse_textresponse_tensors
             r   r   zTextEnvironment.generate  s     1:)0D^*!WGL]L]A^^6FG1,,GG11-@445EF145E~Wg1h 	V-A}oaL''u'U	V  _Gs   B"B"B(c                     |D ]:  }|j                   r| j                  ||      \  }}|s)|j                  |       < |S )zJ
        Check if the current generation sequences have finished.
        r   r   )rF   r   rW   )r   r   r   r   rG   r   s         r   r   zTextEnvironment.tasks_end_check  sR     ! 	:G$$#'#6#6w:#6#V 	5$$y$9		:
 r   c                    d}d}|j                   r||fS | j                  Ft        | j                  |j                        j
                  d         | j                  kD  rd}d}||fS | j                  j                  |j                  v rd}||fS |rN| j                  |j                  v r| j                  |j                  v s| j                  |j                  v sd}||fS | j                  |j                  v rd}||fS )zH
        Check if the current generation sequence has finished.
        Fr   T)rF   r   rO   r   rD   r'   	eos_tokenr   r[   r   r   )r   r   r   rG   r   s        r   r   zTextEnvironment.task_end_check  s    	e##??&3t~~gll/K/U/UVW/X+Y\`\k\k+kIE % ^^%%5E % 7#<#<<T[TmTmAm  G$=$==E % '";";;E%r   
batch_sizepad_to_multiple_ofc                    g }| j                   j                  }| j                  sd| j                   _        t        t	        |      |      }t        dt	        |      |      D ]q  }t        t	        |      ||z         }||| }|D 	cg c]  }	t        j                  |	       }
}	||
d}| j                   j                  |dd|d      j                  | j                        }t        | j                  | j                  g| j                         }t        |g      | j                  d<    t!        | j"                        j$                  di || j                  }t'        ||d	   |j(                        D ]^  \  }}}| j                  s|d
|z
  j+                         d }n|}| j                  s||j+                         d }|j-                  |d|        ` t || j                   _        |S c c}	w )aG  
        Generate responses for a list of query tensors.

        Args:
            query_tensors (list[torch.Tensor]): A list of query tensors to generate responses for.
            batch_size (int): The batch size to use for generation.
            pad_to_multiple_of (int): The padding length to use for generation.
        leftr   )r'   attention_maskTNr   )paddingr   r   r   stopping_criteriar   r   r   )r   padding_sider   minrO   r   r>   rS   padrA   r   r   r   r   r   r   r   r   r   rm   r    sumr%   )r   r   r   r   outputspadding_side_defaultr-   	end_indexbatchelement
batch_maskinputspadded_inputsr   generations
generationr`   r    outputs                      r   r   z!TextEnvironment._generate_batched  s    #~~::&&*0DNN' ]+Z8
q#m,j9 "	:AC.J?I!!I.EBGHw%//'2HJH#(JGF NN..#5# /  b$$%  !7IZIZ7[]a]k]k l:NPaOb:cD""#67J5djjAJJu]u^b^t^tuK69]+;<>O>`>`7 :2
D"2 ..'T(8(:;F'F..#TJJLN3F v&7'789:-"	:F ';#? Is   G))	NNNNN   d   NNrz   )   N)r0   r1   r2   r3   r   r   r   r   r   r   r   r   intr   r   r   r   r   r}   r}      su     =^~=B#J2	 2 ,0	: : %SM	:r   r}   )r   r   typingr   r>   accelerate.utilsr   transformersr   r   transformers.utilsr   richr   	rich.textr	   r   r5   r}   r   r   r   <module>r      sP    
    8 ? 0 - :W WtV Vr   