
    bi;O                        d dl Z d dlZd dlZd dlZd dlmZ d dl mZ d dlmZm	Z	 d dl
mZ d dlZd dlZd dlmZ d dlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZm Z m!Z!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)  ee*      Z+ e"       r	d dl,m-c m.Z/ dej`                  jb                  de2fdZ3dej`                  jb                  de2fdZ4dej`                  jb                  de2fdZ5dej`                  jb                  de2fdZ6dej`                  jb                  dej`                  jb                  fdZ7dej`                  jb                  fdZ8dej`                  jb                  de2fdZ9	 d5de2de2de2fd Z:d! Z;d"e<fd#Z=d6d$e2d%e2fd&Z> e d'      rej~                  nejZ                  Z@e@j                  j                  ej                  eej                  gZE e d(      r%eEj                  ej                  j                         d7d)ZId* ZJd+ ZKd7d,eLde2fd-ZMdeLfd.ZNd/ ZOd0 ZPd1eQfd2ZRd8dej`                  jb                  d3e2deSej`                  jb                     fd4ZTy)9    N)encode)OrderedDict)partialreduce)
MethodType)Version)	save_file   )write_basic_config)
get_logger)PartialState   )FSDP_PYTORCH_VERSION)DistributedType)is_deepspeed_availableis_numpy_availableis_torch_distributed_availableis_torch_xla_availableis_weights_only_available)id_tensor_storage)convert_model)is_torch_versionmodulereturnc                     t        t        d      syt        | t        j                  j                  j
                        S )zD
    Check whether the module was compiled with torch.compile()
    _dynamoF)hasattrtorch
isinstancer   
eval_frameOptimizedModuler   s    Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/accelerate/utils/other.pyis_compiled_moduler$   5   s.     5)$femm66FFGG    c                     t        t        d      sy| j                  rE| j                         D ]2  }t	        |t        j
                  j                  j                        s2 y y)z\
    Check whether the module has submodules that were compiled with `torch.compile()`.
    r   FT)r   r   _modulesmodulesr   r   r    r!   r   	submodules     r#   has_compiled_regionsr+   ?   sP     5)$) 	I)U]]%=%=%M%MN	 r%   c                 x     t         t        j                  j                        xr t	         fd D              S )z
    Check whether the module is a repeated block, i.e. `torch.nn.ModuleList` with all children of the same class. This
    is useful to determine whether we should apply regional compilation to the module.
    c              3   P   K   | ]  }t        |d    j                          yw)r   N)r   	__class__).0mr   s     r#   	<genexpr>z%is_repeated_blocks.<locals>.<genexpr>T   s$     :nbc:aPQI\I\;]:ns   #&)r   r   nn
ModuleListallr"   s   `r#   is_repeated_blocksr5   N   s-     fehh112ns:ngm:n7nnr%   c                 `    | j                   r"| j                         D ]  }t        |      s y y)z
    Check whether the module has repeated blocks, i.e. `torch.nn.ModuleList` with all children of the same class, at
    any level of the module hierarchy. This is useful to determine whether we should apply regional compilation to the
    module.
    TF)r'   r(   r5   r)   s     r#   has_repeated_blocksr7   W   s1     ) 	I!),	 r%   c                     dt         j                  j                  dt         j                  j                  ffd | fi |}d|j                  vr| |j                  d<   |S )a_  
    Performs regional compilation where we target repeated blocks of the same class and compile them sequentially to
    hit the compiler's cache. For example, in `GPT2LMHeadModel`, the repeated block/class is `GPT2Block`, and can be
    accessed as `model.transformer.h[0]`. The rest of the model (e.g. model.lm_head) is compiled separately.

    This allows us to speed up the compilation overhead / cold start of models like LLMs and Transformers in general.
    See https://pytorch.org/tutorials/recipes/regional_compilation.html for more details.

    Args:
        module (`torch.nn.Module`):
            The model to compile.
        **compile_kwargs:
            Additional keyword arguments to pass to `torch.compile()`.

    Returns:
        `torch.nn.Module`: A new instance of the model with some compiled regions.

    Example:
    ```python
    >>> from accelerate.utils import compile_regions
    >>> from transformers import AutoModelForCausalLM

    >>> model = AutoModelForCausalLM.from_pretrained("gpt2")
    >>> compiled_model = compile_regions(model, mode="reduce-overhead")
    >>> compiled_model.transformer.h[0]
    OptimizedModule(
        (_orig_mod): GPT2Block(
                (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
                (attn): GPT2Attention(
                (c_attn): Conv1D(nf=2304, nx=768)
                (c_proj): Conv1D(nf=768, nx=768)
                (attn_dropout): Dropout(p=0.1, inplace=False)
                (resid_dropout): Dropout(p=0.1, inplace=False)
            )
            (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (mlp): GPT2MLP(
                (c_fc): Conv1D(nf=3072, nx=768)
                (c_proj): Conv1D(nf=768, nx=3072)
                (act): NewGELUActivation()
                (dropout): Dropout(p=0.1, inplace=False)
            )
        )
    )
    ```
    r   r   c           	          t        |       rLt        j                  j                         }| D ]'  }|j	                  t        j
                  |fi |       ) |S t        |       r| j                  j                  | j                        }|j                  j                  | j                         i |_        | j                         D ]  \  }}|j                  | |fi |         |S t        j
                  | fi |}|S N)r5   r   r2   r3   appendcompiler7   r.   __new____dict__updater'   named_children
add_module)r   compile_kwargs
new_moduler*   name_compile_regionss        r#   rE   z)compile_regions.<locals>._compile_regions   s    f%,,.J# N	!!%--	"L^"LMN  !())11&2B2BCJ&&v7"$J#)#8#8#: [i%%d,<Y,Y.,YZ[
  v@@Jr%   	_orig_mod)r   r2   Moduler>   )r   rB   rC   rE   s      @r#   compile_regionsrH   e   s\    ^ uxx   "&;N;J*---+1
K(r%   c                     t        |       r| D ]  } |j                  di |  yt        |       r"| j                         D ]  }t	        |fi |  y | j                  di | y)a  
    Performs regional compilation the same way as `compile_regions`, but specifically for `DeepSpeedEngine.module`.
    Since the model is wrapped in a `DeepSpeedEngine` and has many added hooks, offloaded parameters, etc that
    `torch.compile(...)` interferes with, version of trgional compilation uses the inplace `module.compile()` method
    instead.

    Args:
        module (`torch.nn.Module`):
            The model to compile.
        **compile_kwargs:
            Additional keyword arguments to pass to `module.compile()`.
    N )r5   r<   r7   childrencompile_regions_deepspeed)r   rB   r*   childs       r#   rL   rL      sl     &! 	0II//	0	V	$__& 	?E%e>~>	? 	((r%   modelc                 z    t        dd      rddlm nddlm t	        fd| j                         D              S )z
    Check if the model has DTensor parameters.

    Args:
        model (`torch.nn.Module`):
            The model to check.

    Returns:
        `bool`: Whether the model has DTensor parameters.
    >=z2.5.0r   )DTensorc              3   6   K   | ]  }t        |        y wr:   )r   )r/   prQ   s     r#   r1   z$model_has_dtensor.<locals>.<genexpr>   s     B!z!W%Bs   )r   torch.distributed.tensorrQ   torch.distributed._tensorany
parameters)rN   rQ   s    @r#   model_has_dtensorrX      s1     g&4 	6Bu/?/?/ABBBr%   keep_fp32_wrapperkeep_torch_compile	recursivec                     t         j                  j                  j                  t         j                  j                  f}t        |       }t        |       }|r| }| j                  } n|r| }| j                  d   } t               rddl
m} ||fz  }t        dt              rt               rddlm}	 ||	fz  }t#        | |      r| j$                  } t#        | |      r|rfd |       } |s| j&                  }
| j                  j)                  dd      }|<t+        |
d	      r|
j,                  }
|
|k(  rnt+        |
d	      rt/        |
|       | _        t1        | d
d      rt3        | d       |r |r| _        |} | S |r| j                  d<   |} | S )a  
    Extract a model from its distributed containers.

    Args:
        model (`torch.nn.Module`):
            The model to extract.
        keep_fp32_wrapper (`bool`, *optional*):
            Whether to remove mixed precision hooks from the model.
        keep_torch_compile (`bool`, *optional*):
            Whether to unwrap compiled model.
        recursive (`bool`, *optional*, defaults to `False`):
            Whether to recursively extract all cases of `module.module` from `model` as well as unwrap child sublayers
            recursively, not just the top-level distributed containers.

    Returns:
        `torch.nn.Module`: The extracted model.
    rF   r   )DeepSpeedEnginerP   )FullyShardedDataParallelc                     t        | d      r | j                        }n| }|j                         D ]  \  }}t        || |              |S )Nr   )r   r   r@   setattr)r   unwrapped_modulerD   rM   _recursive_unwraps       r#   rb   z6extract_model_from_parallel.<locals>._recursive_unwrap  s[     vx(#4V]]#C #) />>@ Je($0A%0HIJ##r%   _original_forwardN__wrapped__ _converted_to_transformer_engineF)to_transformer_engine)r   r2   parallelDistributedDataParallelDataParallelr$   r+   rF   r>   r   	deepspeedr]   r   r   r   2torch.distributed.fsdp.fully_sharded_data_parallelr^   r   r   forwardpopr   rd   r   getattrr   )rN   rY   rZ   r[   optionsis_compiledhas_compiledcompiled_modelr]   FSDPrl   original_forwardrb   s               @r#   extract_model_from_parallelru      s   ( xx  88%((:O:OPG$U+K'.L	{+-O%%238V8XgD7
UG
$ UG
$ 
	$ "%(-- >>--.A4H''=1!--.. '=1 'w6EM5<eD%u=',N$"E
 L	 38N##K0"ELr%   c                  4    t               j                          y)a  
    Introduces a blocking point in the script, making sure all processes have reached this point before continuing.

    <Tip warning={true}>

    Make sure all processes will reach this instruction otherwise one of your processes will hang forever.

    </Tip>
    N)r   wait_for_everyonerJ   r%   r#   rw   rw   .  s     N$$&r%   
state_dictc           	         t        j                  t              }| j                         D ]3  \  }}t	        |t
              r|t        |         j                  |       5 |j                         D ci c]  \  }}t        |      dkD  s|| }}}t               }|j                         D ]7  }|D cg c]	  }|| v s| }}|j                  |dd        |dd D ]  }| |=  9 t        |      dkD  rt        j                  d| d       | j                         D 	
ci c]2  \  }	}
|	t	        |
t        j                        r|
j!                         n|
4 } }	}
| S c c}}w c c}w c c}
}	w )z
    Cleans the state dictionary from a model and removes tensor aliasing if present.

    Args:
        state_dict (`dict`):
            The state dictionary from a model
    r   Nr   zRemoved shared tensor zk while saving. This should be OK, but check by verifying that you don't receive any warning while reloading)collectionsdefaultdictlistitemsr   strr   r;   lensetvaluesr?   loggerwarningr   Tensor
contiguous)rx   ptrsrD   tensorptrnamesshared_ptrs
warn_namesfound_nameskvs              r#    clean_state_dict_for_safetensorsr   ;  sp    ""4(D"((* 9f&#&"6*+22489
 15

O*#uE
Q3:OKOJ##% 	! ).D1CtDD+ab/*O 	!D4 	!	! :$ZL  1\  ]	
 WaVfVfVhidaQR!z!U\\'BQ\\^IiJi# P E js   3E
E3	E"=E" 7E'save_on_each_nodesafe_serializationc                 |   t               j                  t        j                  k(  rt	        j
                  |       } |r/t        t        ddi      }t        | t              rt        |       } nt        j                  }t               j                  r|s
 || |       yt               j                  r|r
 || |       yyy)a  
    Save the data to disk. Use in place of `torch.save()`.

    Args:
        obj:
            The data to save
        f:
            The file (or file-like object) to use to save the data
        save_on_each_node (`bool`, *optional*, defaults to `False`):
            Whether to only save on the global main process
        safe_serialization (`bool`, *optional*, defaults to `False`):
            Whether to save `obj` using `safetensors` or the traditional PyTorch way (that uses `pickle`).
    formatpt)metadataN)r   distributed_typer   XLAxm_maybe_convert_to_cpur   safe_save_filer   r   r   r   saveis_main_processis_local_main_process)objfr   r   	save_funcs        r#   r   r   ^  s    $ ~&&/*=*==&&s+Nh5EF	c;'237CJJ	~%%.?#q		-	-2C#q 3D	-r%   z2.0.0z1.25.0c                 8   	 t               rKt        j                  j                         }d|vrd|d<   t        j                  j	                  t
               n|j                  dd       t        j                  | fd|i|}t               r?t        j                  j                          rt        j                  j	                  |       |S # t               rAt        j                  j                          r t        j                  j	                  |       w w w xY w)a  
    Compatible drop-in replacement of `torch.load()` which allows for `weights_only` to be used if `torch` version is
    2.4.0 or higher. Otherwise will ignore the kwarg.

    Will also add (and then remove) an exception for numpy arrays

    Args:
        f:
            The file (or file-like object) to use to load the data
        map_location:
            a function, `torch.device`, string or a dict specifying how to remap storage locations
        **kwargs:
            Additional keyword arguments to pass to `torch.load()`.
    weights_onlyTNmap_location)	r   r   serializationget_safe_globalsadd_safe_globalsTORCH_SAFE_GLOBALSrm   loadclear_safe_globals)r   r   kwargsold_safe_globals
loaded_objs        r#   r   r     s    G$&$22CCEV+)-~&001CDJJ~t,ZZGGG
$&224##445EF	 %&224##445EF   's   A?C ADc                     t        | d      st        | d      st        | d|       } t        | d      r| j                  S t        | d      r| j                  S t	        |       S )z(
    Gets a pretty name from `obj`.
    __qualname____name__r.   )r   rn   r   r   r~   )r   s    r#   get_pretty_namer     sX     3'Z0Hc;,sN#sJ||s8Or%   c                     | j                         D ]9  \  }}t        |t              r|j                  |i       }t	        ||       5|||<   ; |S )z
    Recursively merges two dictionaries.

    Args:
        source (`dict`): The dictionary to merge into `destination`.
        destination (`dict`): The dictionary to merge `source` into.
    )r}   r   dict
setdefaultmerge_dicts)sourcedestinationkeyvaluenodes        r#   r   r     sU     lln %
UeT"))#r2Dt$$K% r%   portc                     | d} t        j                   t         j                  t         j                        5 }|j                  d| f      dk(  cddd       S # 1 sw Y   yxY w)z
    Checks if a port is in use on `localhost`. Useful for checking if multiple `accelerate launch` commands have been
    run and need to see if the port is already in use.
    Ni<s  	localhostr   )socketAF_INETSOCK_STREAM
connect_ex)r   ss     r#   is_port_in_user     sS    
 |	v~~v'9'9	: 6a||[$/0A56 6 6s   AA c                      t        j                   t         j                  t         j                        5 } | j                  d       | j	                         d   cddd       S # 1 sw Y   yxY w)z
    Gets a free port on `localhost`. Useful for automatic port selection when port 0 is specified in distributed
    training scenarios.

    Returns:
        int: An available port number
    ) r   r   N)r   r   r   bindgetsockname)r   s    r#   get_free_portr     sL     
v~~v'9'9	: "a	w}}q!" " "s   $A!!A*c                 h    dD ]  }| dk  rt        | d       d| c S | dz  } ! t        | d       dS )z7Converts `size` from bytes to the largest possible unit)bytesKBMBGBTBg      @r
    z PB)round)sizexs     r#   convert_bytesr     sO    . &=D!n%Qqc**
 D!nS!!r%   c                  
   t        j                         } | j                  }|dk7  ryt        j                  d| j
                        ^}}}d}t        |      t        |      k  r!d| d| d}t        j                  |d	       yy)
zFWarns if the kernel version is below the recommended minimum on Linux.LinuxNz(\d+\.\d+\.\d+)z5.5.0zDetected kernel version z,, which is below the recommended minimum of zo; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.T)main_process_only)	platformunamesystemresplitreleaser   r   r   )infor   _versionmin_versionmsgs         r#   check_os_kernelr     s     >>D[[FXX0$,,?NAwKw'+..&wi/[\g[h is s 	 	sd3 /r%   attrc                 F    d }t        || g|j                  d      z         S )z
    Recursive `getattr`.

    Args:
        obj:
            A class instance holding the attribute.
        attr (`str`):
            The attribute that is to be retrieved, e.g. 'attribute1.attribute2'.
    c                     t        | |      S r:   )rn   )r   r   s     r#   _getattrz#recursive_getattr.<locals>._getattr  s    sD!!r%   .)r   r   )r   r   r   s      r#   recursive_getattrr     s%    " (SEDJJsO344r%   return_fqnsc                    |s| nd| f}|g}g }|r|j                         }|r|\  }}|j                         D ]]  \  }}t        |t        j                  j
                        s+|r r|dz   |z   n|}	|j                  |	|f       M|j                  |       _ |r|j                  |f       n|j                  |       |r|ddd   S )aA  Traverse the model in bottom-up order and return the children modules in that order.

    Args:
        model (`torch.nn.Module`): the model to get the children of

    Returns:
        `list[torch.nn.Module]`: a list of children modules of `model` in bottom-up order. The last element is the
        `model` itself.
    r   r   N)rm   r@   r   r   r2   rG   r;   )
rN   r   topstackordered_modulescurrent_modulecurrent_module_namerD   r   
child_names
             r#   get_module_children_bottom_upr     s     #%UCEEO
2@/(779 	'JD$$0EX!4s!:T!A^bJLL*d!34LL&	' ""$7#HI"">2  4R4  r%   )TTF)FFr:   )F)Urz   r   r   r   codecsr   r   	functoolsr   r   typesr   numpynpr   packaging.versionr   safetensors.torchr	   r   commands.config.defaultr   loggingr   stater   	constantsr   dataclassesr   importsr   r   r   r   r   modelingr   transformer_enginer   versionsr   r   r   torch_xla.core.xla_modelcore	xla_modelr   r2   rG   boolr$   r+   r5   r7   rH   rL   rX   ru   rw   r   r   r   _corenp_core
multiarray_reconstructndarraydtyper   r;   dtypesUInt32DTyper   r   r   intr   r   r   r   r~   r   r|   r   rJ   r%   r#   <module>r     sa     	   # %    % 9 8     + (  ( - & 
H	 ))Huxx H4 H T ouxx o4 o D EEHHOO E%((// EP)ehhoo )0CUXX__ C C* _dR"R?CRW[Rj
'   FD d H )1"((rww ##JJ
HH  hbii334@
$6 6 6
"s 
""4$5 5"! !t !X\]b]e]e]l]lXm !r%   