
    bixD                    t   d dl mZ d dlZd dlZd dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZ  G d d      ZddZ ej:                         ddd       Z ej:                         dd       Z G d d      Z  ej:                         	 	 	 d	 	 	 	 	 dd       Z!y)    )annotationsN)CallableOptionalUnion)clear_device_cache)snapshot_download)HFValidationErrorLocalEntryNotFoundError)SafetensorError	safe_open)cached_file)get_checkpoint_shard_files)is_bnb_4bit_availableis_bnb_availableis_xpu_availablec                  \     e Zd Zd fd	Zed	d       Zed
d       Zd Zd Zd Z	d Z
 xZS )NFQuantizerc                   t        |   |i | || _        || _        || _        || _        | j                  dk(  rB| j                  | j                        | _        | j                  j                  |      | _        y | j                  dk(  rB| j                  | j                        | _        | j                  j                  |      | _        y t        d      )Nnormal)num_bitsuniformz-Other quantization methods not supported yet.)super__init__r   devicemethod
block_sizecreate_normal_mapnorm_lookup_tabletocreate_uniform_mapNotImplementedError)selfr   r   r   r   argskwargs	__class__s          Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/utils/loftq_utils.pyr   zNFQuantizer.__init__$   s    $)&) $;;("%)%;%;T]]%;%SD"%)%;%;%>%>v%FD"[[I%%)%<%<dmm%<%TD"%)%;%;%>%>v%FD"%&UVV    c                    | rVt        j                  ddd|dz
  z        }t        j                  ddd|dz
  z        }t        j                  ||dd  g      }|S t        j                  ddd|z        }|S )Nr         )torchlinspacecat)	symmetricr   negativepositivetables        r&   r    zNFQuantizer.create_uniform_map3   sx    ~~b!Q8a<-@AH~~aA(Q,,?@HIIx!"67E  NN2q!X+6Er'   c                   	 ddl m} d|z  }|r~|j                  t	        j
                  d| z
  | |dz               j                         }g }t        t        |      dz
        D ]%  }|j                  d||   z  d||dz      z  z          ' |}n|j                  t	        j
                  | d|dz  dz         d d       j                         }dg}	|j                  t	        j
                  | d|dz        d d        j                         }
||	z   |
z   }t	        j                  |      }|j                         j                  }||j                         z  }|S # t        $ r t        d      w xY w)Nr   )normzMThe required package 'scipy' is not installed. Please install it to continue.r*   r+   g      ?r)   )scipy.statsr4   ImportErrorppfr,   r-   tolistrangelenappendTensorsortvaluesmax)offsetr/   r   r4   
variationsvr>   indexv1v2v3s              r&   r   zNFQuantizer.create_normal_map?   sa   	o( [
F
FJNKLSSUAFs1vz* CcAeHnsQuqy\/AABCA %..jAo6IJ3BOPWWYBB88ENN63
aH"MNNVVXBR"Aa%%&**,)  	omnn	os   E E)c                8   t        j                  |      j                         }||z  }|j                  d      }t        j                  | j
                        j                  dd      }t        j                  ||z
        }t        j                  |d      }||fS )Nr)   r+   dim)r,   absr?   	unsqueezetensorr   reshapeargmin)r"   weightmax_absweight_normedweight_normed_expanded
L_reshapedabs_diffqweights           r&   quantize_tensorzNFQuantizer.quantize_tensorY   s    ))F#'')(!.!8!8!< \\$"8"89AA!RH
 993j@A ,,xR0r'   c                    |j                         }| j                  |   }||z  }|j                  |j                        }|S )N)flattenr   rM   shape)r"   rU   rP   qweight_flattenrQ   rO   s         r&   dequantize_tensorzNFQuantizer.dequantize_tensori   s?    !//+..?(.r'   c           	         t        |j                        dk7  r"t        dt        |j                         d      |j                  d   |j                  d   z  | j                  z  dk7  r9t        d|j                  d    d|j                  d    d| j                   d	      |j                  \  }}|j                  }|j                         }|j                  d
| j                        }| j                  dk(  r$|j                         j                  d
      d   }nC| j                  dk(  r)|j                  d
      d|j                  d
      z  z   }nt        d      |j                  d
      }||z  }|j                  d
      }| j                  j                  dd
      }	t        j                  ||	z
        }
t        j                   |
d
      }|j                  d
d| j"                  z        }t        j$                  ||z  dz  | j"                  z  dft        j&                  |      }t)        d| j"                  z        D ]:  }|d d |f   || j"                  z  z  |d d |f<   |d d dfxx   |d d |f   z  cc<   < |||j                  fS )Nr*   +Only support 2D matrix, but your input has  dimensions.r   r+   zWeight with shape (z x z!) is not dividable by block size .r)   r   rH   r   g      @zMethod not supported yet.   dtyper   )r:   rY   
ValueErrorr   r   rX   rM   r   rJ   r?   meanstdr!   rK   r   r,   rN   r   zerosuint8r9   )r"   rO   MNr   weight_flattenweight_block
weight_maxweight_divabsrS   rT   rU   qweight_packis                 r&   quantize_blockzNFQuantizer.quantize_blocks   sh   v||!J3v||K\J]]ijkk<<?V\\!_,t>!C%fll1o%6c&,,q/9J K226//1B!E 
 ||1  )%--b$//B;;("%))+//B/7:J[[I%%**r*2S<;K;KPR;K;S5SSJ%&ABB))"-
$z1%//3++33Ar:
99]Z78,,xR0 //"a4==&89{{AEQJ$>#B%++^de qDMM)* 	0A#AqDMQ->>GAqDMA'!Q$-/	0 Z55r'   c                B   |j                   }t        j                  |j                  d   d| j                  z  ft        j
                  |      }t        d| j                  z        D ]  }|j                  t        j                        d| j                  z  z  }|j                  t        j                        }| j                  |   j                         |d d |f<   || j                  z	  } |j                  d| j                        }||z  }|j                  |      }|S )Nr   r`   ra   r*   r)   )r   r,   rf   rY   r   float32r9   r   longr   squeezerM   r   )	r"   rU   rl   weight_shaper   rO   ro   lookup_table_idxrk   s	            r&   dequantize_blockzNFQuantizer.dequantize_block   s    gmmA.T]]0BC5==aghqDMM)* 	/A&zz%**54==8HH/225::>112BCKKMF1a4L.G		/ ~~b$//:
*-r'   )r*   cudar   @   )F   )g+ew?Fr*   )__name__
__module____qualname__r   staticmethodr    r   rV   r[   rp   rw   __classcell__)r%   s   @r&   r   r   #   sF    W 	 	  2  '6Rr'   r   c                   t        | j                               }|dk7  rt        d| d      t        j                  j                  | d      \  }}}|t        j                  t        j                  |      ddd|f         z  }t        j                  t        j                  |      d|ddf         |z  }||||||dS )	zn
    :param weight: The matrix to decompose, of shape (H, W) :param reduced_rank: the final rank :return:
    r*   r]   r^   F)full_matricesNr   )LRUSVhreduced_rank)r:   sizerc   r,   linalgsvdsqrtdiag)rO   r   matrix_dimensionr   r   r   r   r   s           r&   _low_rank_decompositionr      s     6;;=)1FGWFXXdeff ||e<HAq"	UZZ

1a<&789:A

5::a=<!234r9A"lSSr'   c                   t               rdd l}nt        d      |dvrt        d      |dk  rt        d      | j                         \  }}| j                  }| j
                  }t        j                  d| d| d| d	| d
| 
       t               r|dv rt        ||dd      }	|}
nt               rdnd}
| j                  |
t        j                        } | j                         }t        |      D ]  }t!                |dk(  rxt               rn|j"                  j%                  |j                  d      ddd      j                  |
      }|j&                  j)                  |j*                  |j,                        }n(	j/                  |      \  }}}|	j1                  |||      }| |z
  }t3        ||      }|d   |d   |d   }}}| t        j4                  ||      z
  } }}j                  ||      ||fS )Nr   z>bitsandbytes is not available, please install it to use LoftQ.)r*   rz   r`   z&Only support 2, 4, 8 bits quantizationz+Number of iterations must be greater than 0z	Weight: (z, z
) | Rank: z | Num Iter: z | Num Bits: )r*   r`   r   ry   )r   r   r   r   xpurx   r   rb   rz   cpuFnf4)requires_gradcompress_statistics
quant_typer   r   r   r   )r   bitsandbytesrc   r   r   rb   logginginfor   r   r   r   r,   rr   cloner9   r   nn
Params4bit
functionaldequantize_4bitdataquant_staterp   rw   r   mm)rO   r   r   num_iterbnbout_feature
in_featurer   rb   	quantizercompute_deviceresro   rU   dequantized_weightquantized_weightrP   rY   outputr   r   lora_Alora_Bs                          r&   
loftq_initr      s   "YZZy ABB1}FGG$kkmK]]FLLELL
K=:,jmT\S]]jksjtu !"h&&8&^`a	"2"4&YYnEMMYBF
,,.C8_ &q=24ff''uUZ_ ( b   "%!?!?gNaNa!b/8/G/G/L,gu!*!;!;<LgW\!])) )<H#C[&+vn7Ml1uxx1~%#&& FF  e <ffLLr'   c                p   dd l }|dk7  rt        d      t               st        d      t               rdnd}|j                  j                  | j                  | j                        }|j                  |t        j                        }||z
  }t                t        ||      }|d	   |d
   |d   }}
}	|
|	fS )Nr   rz   z0Only 4 bit quantization supported at the moment.z0bitsandbytes 4bit quantization is not available.r   rx   r   r   r   r   r   )r   rc   r   r   r   r   r   r   r   r,   rr   r   r   )rU   rO   r   r   r   r   r   residualr   r   r   s              r&   _loftq_init_newr      s    1}KLL "KLL.0UfN77gFYFYZYYnEMMYBF**H$XLIFfSk6.3I,qAa4Kr'   c                      e Zd ZdZd Zd Zy)_SafetensorLoaderz
    Simple utility class that loads tensors with safetensors from a single file or sharded files.

    Takes care of file name normalization etc.

    c                   |,	 t        |j                  j                  j                  d      }d}|j                  |      s t        j                  j                  ||      }|| _        t        |j                         dd       | _        d| _        d| _        d | _        t        j                  j'                  |      s|j)                  t        j                  j*                        d	   }	 t-        |t/        |d
            \  }}d| _        |D ci c]/  }|j)                  t        j                  j*                        d   |1 }	}|d   j5                         D 
ci c]  \  }}
||	|
    c}
}| _        y y # t        t
        f$ r}t        d      |d }~wt        $ r}t        d      |d }~ww xY w# t0        $ r}t3        d| d      |d }~ww xY wc c}w c c}
}w )NT)local_files_onlyzThe provided model does not appear to be a transformers model or is a local model. In this case, you must pass the model_path argument that points to the safetensors file.zNThe model.safetensors file must be present on disk, but it could not be found.zmodel.safetensorsbase_model_prefixbase_model.model.Fr   zmodel.safetensors.index.jsonzCould not find file for zA, ensure that there is a (sharded) safetensors file of the model.r)   
weight_map)r   
base_modelconfig_name_or_pathAttributeErrorr	   rc   r
   endswithospathjoin
model_pathgetattrget_base_modelr   prefix
is_shardedr   exists
rpartitionsepr   r   OSErrorFileNotFoundErroritems)r"   
peft_modelr   excsuffixpar_dirresolved_archive_filesharded_metadatakfile_maprB   s              r&   r   z_SafetensorLoader.__init__  s   
.z/D/D/K/K/Y/Ylpq
 %""6*j&9J$!()B)B)DFY[_!`)ww~~j) ++BGGKK8;G:T[2PQ;7%'7 #DOBWXQRWW[[1"5q8XHX:J<:X:^:^:`a$!Qq(1+~aDO *) #$56  a  +  d,  '.zl:{| YasG   +E5 *F0 4GG5F-FF-F((F-0	G9G		Gc                d   | j                   s| j                  }n| j                  |   }t        |dd      5 }	 |j	                  |      }d d d        |S # t
        $ rE}| j                  r-|t        | j                        dz   d  }|j	                  |      }n|Y d }~Sd }~ww xY w# 1 sw Y   S xY w)Nptr   )	frameworkr   r+   )r   r   r   r   
get_tensorr   r   r:   )r"   name	file_pathfrL   r   s         r&   r   z_SafetensorLoader.get_tensor6  s    I-IyD? 
	1	d+
	  # ))D$:$: ;a ? ABD\\$/FI 
	 s.   B%A	B";BB%B""B%%B/N)r{   r|   r}   __doc__r   r    r'   r&   r   r     s    'bRr'   r   c                   t               st        d      ddlm} d}d}t	        | |      }| j                         D ]  \  }}	t        |	|      s|j                  |      st        d      d}|t        |      d }|j                  |d	z         }
|	j                  |   }t        |	j                  |
d
|      \  }}|s=||	j                  |   j                  _        ||	j                   |   j                  _        |	j                  |   j                  j                  }|	j                   |   j                  j                  }||	j                  |   j                  _        ||	j                   |   j                  _         || |      }|s<||	j                  |   j                  _        ||	j                   |   j                  _        ~~ |st        d      y)a  
    Replace the LoRA weights of a model quantized with bitsandbytes, using the LoftQ technique.

    The replacement is done on the fly by loading in the non-quantized weights from a locally stored safetensors model
    file and initializing the LoRA weights such that the quantization error between the original and quantized weights
    is minimized.

    As lazy loading is not possible with pickle, normal PyTorch checkpoint files cannot be supported.

    Depending on the model size, calling this function may take some time to finish.

    Args:
        peft_model (`PeftModel`):
            The model to replace the weights of. Must be a quantized PEFT model with LoRA layers.
        model_path (`Optional[str]`):
            The path to the model safetensors file. If the model is a Hugging Face model, this will be inferred from
            the model's config. Otherwise, it must be provided.
        adapter_name (`str`):
            The name of the adapter to replace the weights of. The default adapter name is "default".
        callback (`Optional[Callable[[PeftModel, str], bool]]`):
            A callback function that will be called after each module is replaced. The callback function should take
            the model and the name of the current module as input and return a boolean indicating whether the
            replacement should be kept. If the callback returns False, the replacement will be rolled back. This can be
            very useful to confirm that the LoftQ initialization actually decreases the quantization error of the
            model. As an example, this callback could generate logits for given input and compare it with the logits
            from the original, non-quanitzed model with the same input, and only return `True` if there is an
            improvement. As this is a greedy optimization, it's possible that calling this function multiple times
            yields incremental improvements.
    zHbitsandbytes must be installed and the model must be quantized in 4bits.r   )
Linear4bitr   Fz8The passed model does not appear to be a valid PeftModelTNz.weightrz   )r   r   z%No bnb LoRA module found on the model)r   rc   peft.tuners.lorar   r   named_modules
isinstance
startswith	TypeErrorr:   r   rr   rO   r   r   r   )r   r   adapter_namecallbackr   r   	any_matchsafetensor_loaderr   modulerL   r   r   r   lora_A_beforelora_B_beforeshould_replaces                    r&   replace_lora_weights_loftqr   J  s   H !"cdd+ !FI)*jA #002 )f&*-v&VWW	CKM""--dY.>?xx-(Yef6<FMM,'..36<FMM,'..3l3::??l3::??28l#**/28l#**/!*d36CFMM,'..36CFMM,'..3=;)> @AA r'   )    )r+   )rO   z'Union[torch.Tensor, torch.nn.Parameter]r   intr   r   )r   r   r   r   )NdefaultN)r   zOptional[str]r   strr   z0Optional[Callable[[torch.nn.Module, str], bool]])"
__future__r   r   r   typingr   r   r   r,   accelerate.utils.memoryr   huggingface_hubr   huggingface_hub.errorsr	   r
   safetensorsr   r   transformers.utilsr   transformers.utils.hubr   peft.import_utilsr   r   r   r   r   no_gradr   r   r   r   r   r'   r&   <module>r      s   $ #  	 , ,  6 - M 2 * = W WG GTT" .M .Mb  (B BJ  !%!AE	NBNB NB ?	NB NBr'   