
    bi              	       R   d Z ddlmZ ddlmZmZmZ ddlZddlm	Z	m
Z
 er e	       rddlmZ dej                  j                  fd	Zd
edee   defdZd
edefdZe
dej                  j                  fd       Ze
defdej                  j                  ded   dee   fd       Zy)z,
Needed utilities for torchao FP8 training.
    )partial)TYPE_CHECKINGCallableOptionalN   )is_torchao_availabletorchao_required)Float8LinearConfigmodelc                     d\  }}| j                         D ]0  \  }}t        |t        j                  j                        s+||}|}2 ||fS )z
    Finds the first and last linear layer names in a model.

    This is needed during FP8 to avoid issues with instability by keeping the first and last layers unquantized.

    Ref: https://x.com/xariusrke/status/1826669142604141052
    )NN)named_modules
isinstancetorchnnLinear)r   first_linearlast_linearnamemodules        N/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/accelerate/utils/ao.pyfind_first_last_linear_layersr       sY     !+L+++- ffehhoo.##K	
 $$    fqnlayers_to_filterreturnc                     t        | t        j                  j                        r%| j                  dz  dk7  s| j
                  dz  dk7  ry||v ryy)a  
    A function which will check if `module` is:
    - a `torch.nn.Linear` layer
    - has in_features and out_features divisible by 16
    - is not part of `layers_to_filter`

    Args:
        module (`torch.nn.Module`):
            The module to check.
        fqn (`str`):
            The fully qualified name of the layer.
        layers_to_filter (`List[str]`):
            The list of layers to filter.
       r   FT)r   r   r   r   in_featuresout_features)r   r   r   s      r   filter_linear_layersr    1   sM     &%((//*"a'6+>+>+Cq+H
r   c                 >    t        |       \  }}t        | |||g      S )a  
    A filter function which will filter out all linear layers except the first and last.

    <Tip>

        For stability reasons, we skip the first and last linear layers Otherwise can lead to the model not training or
        converging properly

    </Tip>

    Args:
        module (`torch.nn.Module`):
            The module to check.
        fqn (`str`):
            The fully qualified name of the layer.
    r   )r   r    )r   r   r   r   s       r   #filter_first_and_last_linear_layersr#   H   s'    " !>f EL+|[>YZZr   c                 \    ddl m} | j                         D ]  \  }}t        ||      s y y)Nr   )Float8LinearTF)torchao.float8.float8_linearr%   r   r   )r   r%   r   r   s       r   has_ao_layersr'   ]   s3    9++- ffl+ r   configr
   module_filter_funcc                 l    ddl m} t        |       \  }}|t        t        ||g      } || ||       y)a
  
    Converts all `nn.Linear` layers in the model (except the first and last) to torchao's `Float8Linear` layer inplace.

    Args:
        model (`torch.nn.Module`):
            The model to convert.
        config (`torchao.float8.Float8LinearConfig`, *optional*):
            The configuration for the FP8 training. Recommended to utilize
            `torchao.float8.recipe_name_to_linear_config` to generate this. In general, the default config should be
            sufficient (what is passed when set to `None`).
        module_filter_func (`Callable`, *optional*, defaults to `filter_linear_layers`):
            Optional function that must take in a module and layer name, and returns a boolean indicating whether the
            module should be converted to FP8. Defaults to `filter_linear_layers`. See it for an example.

    Example:

    ```python
    from accelerate.utils.ao import convert_model_to_fp8_ao

    model = MyModel()
    model.to("cuda")
    convert_to_float8_training(model)

    model.train()
    ```
    r   )convert_to_float8_trainingNr"   )module_filter_fnr(   )torchao.float8r+   r   r   r    )r   r(   r)   r+   r   r   s         r   convert_model_to_fp8_aor.   g   s?    @ : =e DL+!$%9\[fLghu7IRXYr   )__doc__	functoolsr   typingr   r   r   r   importsr   r	   r&   r
   r   Moduler   strlistboolr    r#   r'   r.    r   r   <module>r8      s     4 4  ; C% %"c T#Y 4 .[S [T [*     .2-P$Z88??$Z)*$Z !*$Z $Zr   