
    bi                     8    d dl Z deeeeef         dee   fdZy)    Ncompletionsreturnc           	          d}| D cg c]
  }|d   d    }}|D cg c]8  }t        j                  ||t         j                  t         j                  z        : }}|D cg c]  }|rdnd
 c}S c c}w c c}w c c}w )a  
    Reward function that checks if the reasoning process is enclosed within `"<think>"` and `"</think>"` tags. The
    function returns a reward of 1.0 if the format is correct, otherwise 0.0.

    Args:
        completions (`list[list[dict[str, str]]]`):
            List of completions to be evaluated. Each completion must be a list of one message, i.e. a dictionary
            containing the key `"content"` with the value being the text of the completion.
        **kwargs:
            Additional keyword arguments. This function does not use them, but they are required in the function
            signature to ensure compatibility with trainers like [`GRPOTrainer`].

    Returns:
        `list[float]`:
            A list of rewards, where each reward is 1.0 if the completion matches the expected format, otherwise 0.0.

    Example:
    ```python
    >>> from trl.rewards import think_format_reward

    >>> completions = [
    ...     [{"content": "<think>\nThis is my reasoning.\n</think>\nThis is my answer."}],
    ...     [{"content": "<think>\nThis is my reasoning.\nThis is my answer."}],
    ... ]
    >>> think_format_reward(completions)
    [1.0, 0.0]
    ```
    z%^<think>(?!.*<think>)(.*?)</think>.*$r   contentg      ?g        )rematchDOTALL	MULTILINE)r   kwargspattern
completioncompletion_contentsr   matchesr   s           U/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/rewards/format_rewards.pythink_format_rewardr      sw    : 7GFQR
:a=3RRSfgrxx"))bll*BCgGg/67e5Cc!77 Sg7s   A/=A4A9)r   listdictstrfloatr        r   <module>r      s4    
 8T$tCH~*>%?  8dSXk  8r   