
    bi8                        d dl Zd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZmZ ddlmZmZ  ee      Zej(                  j+                  d      duZej(                  j+                  d      duZej(                  j+                  d	      duZerd dlZd d
lmZ n ed      erd dlmZ d dlm Z m!Z! n ed      erd dl"m#Z# d dl$m%Z% n ed      ddZ&ddZ'd Z(	 	 ddZ)	 ddZ*d Z+y)    N)ImageImageOps)InterpolationMode)	normalizeresize   )
get_logger
load_imageinsightfaceconsisid_eva_clipfacexlib)FaceAnalysiszPinsightface is not available. Please install it using 'pip install insightface'.)create_model_and_transforms)OPENAI_DATASET_MEANOPENAI_DATASET_STDz\consisid_eva_clip is not available. Please install it using 'pip install consisid_eva_clip'.)init_parsing_model)FaceRestoreHelperzJfacexlib is not available. Please install it using 'pip install facexlib'.c                     | j                   dd \  }}t        ||      |k  r| S |t        ||      z  }t        ||z        }t        ||z        }t        j                  | ||ft        j
                        } | S )a  
    Resize the input image to a specified long edge while maintaining aspect ratio.

    Args:
        image (numpy.ndarray): Input image (H x W x C or H x W).
        resize_long_edge (int): The target size for the long edge of the image. Default is 768.

    Returns:
        numpy.ndarray: Resized image with the long edge matching `resize_long_edge`, while maintaining the aspect
        ratio.
    N   )interpolation)shapemaxintcv2r   INTER_LANCZOS4)imageresize_long_edgehwks        f/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/consisid/consisid_utils.pyresize_numpy_image_longr"   '   sv     ;;r?DAq
1ay$$3q!9$AAE
AAE
AJJuq!fC4F4FGEL    c                 v    d }t        | t              r| D cg c]  } ||||       c}S  || ||      S c c}w )aO  Numpy array to tensor.

    Args:
        imgs (list[ndarray] | ndarray): Input images.
        bgr2rgb (bool): Whether to change bgr to rgb.
        float32 (bool): Whether to change to float32.

    Returns:
        list[tensor] | tensor: Tensor images. If returned results only have
            one element, just return tensor.
    c                 &   | j                   d   dk(  rF|rD| j                  dk(  r| j                  d      } t        j                  | t        j
                        } t        j                  | j                  ddd            } |r| j                         } | S )Nr   r   float64float32r      )
r   dtypeastyper   cvtColorCOLOR_BGR2RGBtorch
from_numpy	transposefloat)imgbgr2rgbr'   s      r!   	_totensorzimg2tensor.<locals>._totensorK   st    99Q<1yyI%jj+,,sC$5$56Cs}}Q156))+C
r#   )
isinstancelist)imgsr2   r'   r3   r1   s        r!   
img2tensorr7   >   sB     $<@AS	#w0AAT7G,, Bs   6c                     d| ddddf   z  d| ddddf   z  z   d| ddddf   z  z   }|j                  dddd      }|S )	a  
    Converts an RGB image to grayscale by applying the standard luminosity formula.

    Args:
        img (torch.Tensor): The input image tensor with shape (batch_size, channels, height, width).
                             The image is expected to be in RGB format (3 channels).

    Returns:
        torch.Tensor: The grayscale image tensor with shape (batch_size, 3, height, width).
                      The grayscale values are replicated across all three channels.
    gA`"?Nr   r(   gbX9?r   gv/?r   )repeat)r1   xs     r!   to_grayr;   Z   s_     	AqsFec!QqS&k11EC1Q3K4GGA	Aq!AHr#   c                 P   | j                          t        j                  |t        j                        }|j	                  |      }t        |      dkD  rt        |d       d   }|d   }|d   }nd}d}| j                  |       | j                  d	       || j                  d   }| j                          t        | j                        dk(  rt        d
      | j                  d   }|&t        j                  d       |j                  |      }t!        j"                  |      j%                  ||      }|j&                  dk(  r|j)                  d      }|
rt+        |d      j)                  d      dz  }|j%                  |      }| j-                  t/        |g dg d            d   j1                  dd      g d}t3        fd|D              j5                         }t!        j6                  |      }t!        j8                  ||t;        |            }t!        j8                  |||      }nXt        j                  |	t        j                        }t+        |d      j)                  d      dz  }|j%                  |      }|x}}t=        ||j>                  t@        jB                        }t/        |||      } ||j%                  |      ddd      \  }}t!        jD                  |ddd      }t!        jF                  ||      }t!        jH                  ||gd      }||||fS )a  
    Process face embeddings from an image, extracting relevant features such as face embeddings, landmarks, and parsed
    face features using a series of face detection and alignment tools.

    Args:
        face_helper_1: Face helper object (first helper) for alignment and landmark detection.
        clip_vision_model: Pre-trained CLIP vision model used for feature extraction.
        face_helper_2: Face helper object (second helper) for embedding extraction.
        eva_transform_mean: Mean values for image normalization before passing to EVA model.
        eva_transform_std: Standard deviation values for image normalization before passing to EVA model.
        app: Application instance used for face detection.
        device: Device (CPU or GPU) where the computations will be performed.
        weight_dtype: Data type of the weights for precision (e.g., `torch.float32`).
        image: Input image in RGB format with pixel values in the range [0, 255].
        original_id_image: (Optional) Original image for feature extraction if `is_align_face` is False.
        is_align_face: Boolean flag indicating whether face alignment should be performed.

    Returns:
        Tuple:
            - id_cond: Concatenated tensor of Ante face embedding and CLIP vision embedding
            - id_vit_hidden: Hidden state of the CLIP vision model, a list of tensors.
            - return_face_features_image_2: Processed face features image after normalization and parsing.
            - face_kps: Keypoints of the face detected in the image.
    r   c                 H    | d   d   | d   d   z
  | d   d   | d   d   z
  z  S )Nbboxr   r   r   r(    )r:   s    r!   <lambda>z)process_face_embeddings.<locals>.<lambda>   s?    QvYq\AfIaL5PUVW]U^_`UadefldmnodpUp4q r#   )key	embeddingkpsNT)only_center_facezfacexlib align face failzMFailed to detect face using insightface. Extracting embedding with align facer(   )r2   g     o@)g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?)dimkeepdim)r               	         c              3   (   K   | ]	  }|k(    y w)Nr?   ).0iparsing_outs     r!   	<genexpr>z*process_face_embeddings.<locals>.<genexpr>   s     4a!4s   F)return_all_featuresreturn_hiddenshuffler   )rF   )%	clean_allr   r+   COLOR_RGB2BGRgetlensorted
read_imageget_face_landmarks_5all_landmarks_5align_warp_facecropped_facesRuntimeErrorloggerwarningget_featr-   r.   tondim	unsqueezer7   
face_parser   argmaxsumbool	ones_likewherer;   r   
image_sizer   BICUBICnormdivcat)face_helper_1clip_vision_modelface_helper_2eva_transform_meaneva_transform_stdappdeviceweight_dtyper   original_id_imageis_align_face	image_bgr	face_infoid_ante_embeddingface_kps
align_faceinputbg_labelbgwhite_imagereturn_face_features_imagereturn_face_features_image_2original_image_bgrface_features_imageid_cond_vitid_vit_hiddenid_cond_vit_normid_condrR   s                               @r!   process_face_embeddingsr   k   s   L UC$5$56I	"I
9~9*qr
	 &k2U#  Y'&&&= 003!!#
=&&'1,566,,Q/J  fg)22:>(():;>>v|T"-77: :t4>>qAEI #..y@UWl/mnopq!((Q(=/484499;ooe,%*[[['%.%Q"',{{2{E'J$ \\*;S=N=NO-t<FFqIEQ DII"%A !"$5$@$@BSB[B[ $$79KM^_!2|,%W[ej"K zz+q!T:))K)9:Kii	K(bG
 	$	 r#   c
                    t        |t              r/t        j                  t	        |      j                  d            }
nJt        j                  t        j                  t        j                  |            j                  d            }
t        |
d      }
|
}t        | ||||||||
||	      \  }}}}|j                         j                         }|j                         }|j                  ddd      }|j!                         dz  }|j#                  t        j$                        }t        j                  t        j                  |            }
|||
|fS )a  
    Process face embeddings from an input image for inference, including alignment, feature extraction, and embedding
    concatenation.

    Args:
        face_helper_1: Face helper object (first helper) for alignment and landmark detection.
        clip_vision_model: Pre-trained CLIP vision model used for feature extraction.
        face_helper_2: Face helper object (second helper) for embedding extraction.
        eva_transform_mean: Mean values for image normalization before passing to EVA model.
        eva_transform_std: Standard deviation values for image normalization before passing to EVA model.
        app: Application instance used for face detection.
        device: Device (CPU or GPU) where the computations will be performed.
        weight_dtype: Data type of the weights for precision (e.g., `torch.float32`).
        img_file_path: Path to the input image file (string) or a numpy array representing an image.
        is_align_face: Boolean flag indicating whether face alignment should be performed (default: True).

    Returns:
        Tuple:
            - id_cond: Concatenated tensor of Ante face embedding and CLIP vision embedding.
            - id_vit_hidden: Hidden state of the CLIP vision model, a list of tensors.
            - image: Processed face image after feature extraction and alignment.
            - face_kps: Keypoints of the face detected in the image.
    )r   RGBi   r(   r   r      )r4   strnparrayr
   convertr   exif_transposer   	fromarrayr"   r   cpudetachsqueezepermutenumpyr*   uint8)rs   rt   ru   rv   rw   rx   ry   rz   img_file_pathr|   r   r{   r   r   align_crop_face_imager   tensors                    r!   process_face_embeddings_inferr      s(   J -%-8@@GH001OPXXY^_` $E40E ?V?;G]18 #&&(//1F^^F^^Aq!$F\\^c!F]]288$F##EOOF$;<EM5(22r#   c                     t        ddddd|t        j                  j                  | d            }d|_        t        d	|t        j                  j                  | d      
      |_        t        j                  j                  |  ddg      }|j                  d       t        dt        j                  j                  | dd      d      \  }}}|j                  }t        |dt              }t        |dt              }	t        |t         t"        f      s|fdz  }t        |	t         t"        f      s|	fdz  }	|}|	}	t%        dt        j                  j                  | d      dg      }
|
j                  dd       |j&                  j)                          |j                  j)                          |j)                          |j&                  j+                  |       |j                  j+                  |       |j+                  ||       ||||
||	fS )a  
    Prepare all face models for the facial recognition task.

    Parameters:
    - model_path: Path to the directory containing model files.
    - device: The device (e.g., 'cuda', 'xpu', 'cpu') where models will be loaded.
    - dtype: Data type (e.g., torch.float32) for model inference.

    Returns:
    - face_helper_1: First face restoration helper.
    - face_helper_2: Second face restoration helper.
    - face_clip_model: CLIP model for face extraction.
    - eva_transform_mean: Mean value for image normalization.
    - eva_transform_std: Standard deviation value for image normalization.
    - face_main_model: Main face analysis model.
    r(   i   )r(   r(   retinaface_resnet50pngface_encoder)upscale_factor	face_size
crop_ratio	det_modelsave_extry   model_rootpathNbisenet)
model_namery   r   z./face_encoder/models/antelopev2/glintr100.onnxCUDAExecutionProvider)	providersr   )ctx_idzEVA02-CLIP-L-14-336zEVA02_CLIP_L_336_psz14_s6B.ptT)force_custom_clip
image_mean	image_stdr   
antelopev2)namerootr   )  r   )r   det_size)r)   )r   ospathjoinrh   r   r   	model_zoo	get_modelpreparer   visualgetattrr   r   r4   r5   tupler   face_detevalre   )
model_pathry   r)   rs   ru   model_face_clip_modelrv   rw   face_main_models              r!   prepare_face_modelsr   #  s   $ &'ww||J?M  $M1VBGGLLUc<d M  ))33,DERiQj 4 M # .
Z1PQKE1a
 llO ,@ST>PQ(4-802Q6'$7.014+) #Z HUlTmO 1z: !!!#f%'vU+-/K]_pppr#   )i   )TT)NT)T),importlib.util	importlibr   r   r   r   r-   PILr   r   torchvision.transformsr   !torchvision.transforms.functionalr   r   utilsr	   r
   __name__rb   util	find_spec_insightface_available_consisid_eva_clip_available_facexlib_availabler   insightface.appr   ImportErrorr   r   consisid_eva_clip.constantsr   r   facexlib.parsingr   &facexlib.utils.face_restoration_helperr   r"   r7   r;   r   r   r   r?   r#   r!   <module>r      s     	 
    4 ? + 
H	"11-@L (~~778KLTXX nn..z:$F ,
h
ii=SS
t
uu3H
b
cc.-86 mt E3PBqr#   