
    biZ                     v   d dl Z d dlZd dlmZ d dlmZ d dlmZmZ d dl	Z
d dlZd dlmZmZ ddededefd	Zdd
ej$                  dej$                  dee   dej$                  fdZdd
ej$                  dej$                  dedej$                  fdZdd
ej$                  dej$                  dedej$                  fdZ G d d      Z G d d      Z	 	 	 	 ddeeef   deeeej6                     ej6                  f      deej8                     deej:                     deej<                     dej$                  fdZy)     N)Mapping)contextmanager)OptionalUnion)is_torch_npu_availableis_torch_xpu_availablenestedsepreturnc                 X    dt         dt        dt         ddffdi } | d|       |S )z>Flatten dictionary and concatenate nested keys with separator.nestprefixintor   Nc                     | j                         D ]D  \  }}|v rt        d d| d      t        |t              r |||z   z   |       =||||z   <   F y )Nzseparator 'z' not allowed to be in key '')items
ValueError
isinstancer   )r   r   r   kvrecurser
   s        C/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/core.pyr   zflatten_dict.<locals>.recurse   sm    JJL 	%DAqax ;se3OPQsRS!TUU!W%6A:+T2#$VaZ 	%     )dictstr)r	   r
   flatr   s    ` @r   flatten_dictr      s<    %d %C %t % % DFBKr   valuesmaskaxisc                     |(| |z  j                  |      |j                  |      z  S | |z  j                         |j                         z  S )z,Compute mean of tensor with a masked values.)r!   )sum)r   r    r!   s      r   masked_meanr$   +   sN    """-d0CCC""$txxz11r   unbiasedc                     t        | |      }| |z
  }t        |dz  |      }|r-|j                         }|dk(  rt        d      ||dz
  z  }||z  }|S )z.Compute variance of tensor with masked values.   r   zThe sum of the mask is zero, which can happen when `mini_batch_size=1`;try increase the `mini_batch_size` or `gradient_accumulation_steps`   )r$   r#   r   )r   r    r%   meancentered_valuesvariancemask_sumbessel_corrections           r   
masked_varr.   3   sq    vt$DtmO?A-t4H88:q=V  %15//Or   
shift_meanc                     t        | |      t        | |      }}| |z
  t        j                  |dz         z  }|s||z  }|S )z!Whiten values with masked values.g:0yE>)r$   r.   torchrsqrt)r   r    r/   r)   varwhiteneds         r   masked_whitenr5   F   sF    FD):fd+C#DS4Z!88HDOr   c                   ,    e Zd ZdZdedefdZdefdZy)LengthSamplerz
    Samples a length
    	min_value	max_valuec                 8    t        t        ||            | _        y N)listranger   )selfr8   r9   s      r   __init__zLengthSampler.__init__T   s    5I67r   r   c                 T    t         j                  j                  | j                        S r;   )nprandomchoicer   )r>   s    r   __call__zLengthSampler.__call__W   s    yy,,r   N)__name__
__module____qualname____doc__intr?   rD    r   r   r7   r7   O   s&    8# 8# 8-# -r   r7   c                   *    e Zd ZdZeed               Zy)PPODecoratorsFc              #   >  K   d  | j                   rt               rGt        j                          t        j
                  j                          t        j                          y t               rGt        j                          t        j                  j                          t        j                          y t        j                  j                         rGt        j                          t        j                  j                          t        j                          y y y wr;   )optimize_device_cacher   gccollectr1   xpuempty_cacher   npucudais_available)clss    r   empty_device_cachez PPODecorators.empty_device_cache^   s      	$$%'

		%%'

')

		%%'

((*



&&(

 + %s   DDN)rE   rF   rG   rN   classmethodr   rW   rJ   r   r   rL   rL   [   s"    !  r   rL   shape	generatordevicedtypelayoutc           
      D   |}| d   }|xs t         j                  }|xs t        j                  d      }|t        |t              s|j                  j
                  n|d   j                  j
                  }||j
                  k7  r1|dk(  r,d}|dk7  rKt        j                  d| d| d| dt               n&||j
                  k7  r|d	k(  rt        d
| d| d      t        |t              rt        |      dk(  r|d   }t        |t              rcd| dd z   } t        |      D cg c]  }t        j                  | ||   |||      ! }	}t        j                  |	d      j                  |      }	|	S t        j                  | ||||      j                  |      }	|	S c c}w )zA helper function to create random tensors on the desired `device` with the desired `dtype`. When
    passing a list of generators, you can seed each batch size individually. If CPU generators are passed, the tensor
    is always created on the CPU.
    r   cpuNmpszBThe passed generator was created on 'cpu' even though a tensor on zB was expected. Tensors will be created on 'cpu' and then moved to zk. Note that one can probably slighly speed up this function by passing a generator that was created on the z device.rT   zCannot generate a z! tensor from a generator of type .r(   )r(   )rZ   r[   r\   r]   )dim)r1   stridedr[   r   r<   typewarningswarnUserWarningr   lenr=   randncatto)
rY   rZ   r[   r\   r]   rand_device
batch_sizegen_device_typeilatentss
             r   randn_tensorrq   q   s    KqJ$u}}F*u||E*F7A)T7R)**//XabcXdXkXkXpXpfkk)o.FKXY_X` aKKQ( Sfflemmuw  	 +60I1&9Z[jZkklmnn )T"s9~':aL	)T"uQRy  :&
 KK1kQV_ef
 
 ))G+..v6 N ++eyTYbhillmstN
s   $$F)/r;   )T)NNNN) rO   re   collections.abcr   
contextlibr   typingr   r   numpyrA   r1   transformersr   r   r   r   r   Tensorboolr$   r.   r5   r7   rL   tupler<   	Generatorr[   r\   r]   rq   rJ   r   r   <module>r|      sr   
  # % "   G C $ "2 2ELL 2 2Z_ZfZf 2u|| 5<< 4 SXS_S_ &%,, ell  X]XdXd 	- 	- 0 JN%)#'%)..d5??3U__DEF. U\\". EKK 	.
 U\\". \\.r   