
    bi                     p   d dl mZ d dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZ d dlmZmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZmZm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)  e       rd dl*m+c m,Z- dZ.ndZ. e j^                  e0      Z1dZ2e G d de             Z3 G d de'      Z4y)    )	dataclass)partial)AnyDictListOptionalTupleUnionN)Image)tqdm)CLIPTextModelCLIPTokenizer   )PipelineImageInput)AutoencoderKLUNet2DConditionModel)DDIMSchedulerLCMScheduler)
BaseOutputis_torch_xla_availableloggingreplace_example_docstring)is_scipy_available)randn_tensor   )DiffusionPipeline   )MarigoldImageProcessorTFaE  
Examples:
```py
>>> import diffusers
>>> import torch

>>> pipe = diffusers.MarigoldDepthPipeline.from_pretrained(
...     "prs-eth/marigold-depth-v1-1", variant="fp16", torch_dtype=torch.float16
... ).to("cuda")

>>> image = diffusers.utils.load_image("https://marigoldmonodepth.github.io/images/einstein.jpg")
>>> depth = pipe(image)

>>> vis = pipe.image_processor.visualize_depth(depth.prediction)
>>> vis[0].save("einstein_depth.png")

>>> depth_16bit = pipe.image_processor.export_depth_to_16bit_png(depth.prediction)
>>> depth_16bit[0].save("einstein_depth_16bit.png")
```
c                       e Zd ZU dZeej                  ej                  f   e	d<   edej                  ej                  f   e	d<   edej                  f   e	d<   y)MarigoldDepthOutputa  
    Output class for Marigold monocular depth prediction pipeline.

    Args:
        prediction (`np.ndarray`, `torch.Tensor`):
            Predicted depth maps with values in the range [0, 1]. The shape is $numimages 	imes 1 	imes height 	imes
            width$ for `torch.Tensor` or $numimages 	imes height 	imes width 	imes 1$ for `np.ndarray`.
        uncertainty (`None`, `np.ndarray`, `torch.Tensor`):
            Uncertainty maps computed from the ensemble, with values in the range [0, 1]. The shape is $numimages
            	imes 1 	imes height 	imes width$ for `torch.Tensor` or $numimages 	imes height 	imes width 	imes 1$
            for `np.ndarray`.
        latent (`None`, `torch.Tensor`):
            Latent features corresponding to the predictions, compatible with the `latents` argument of the pipeline.
            The shape is $numimages * numensemble 	imes 4 	imes latentheight 	imes latentwidth$.
    
predictionNuncertaintylatent)
__name__
__module____qualname____doc__r
   npndarraytorchTensor__annotations__     o/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/pipelines/marigold/pipeline_marigold_depth.pyr    r    R   sO      bjj%,,.//tRZZ566$$%%r.   r    c            !           e Zd ZdZdZdZ	 	 	 	 	 d,dededee	e
f   deded	ee   d
ee   dee   dee   dee   f fdZdedededededededeeeef      deej,                     deeej.                  eej.                     f      dededefdZej4                  j6                  d-d       Z ej:                          ee      	 	 	 	 	 	 	 	 	 	 	 	 	 	 d.dedee   dedee   dededededeeeef      deeej,                  eej,                     f      deeej.                  eej.                     f      dededed efd!              Z dej,                  deej,                     deej.                     dedede!ej,                  ej,                  f   fd"Z"d#ej,                  dej,                  fd$Z#e$	 	 	 	 	 	 	 	 d/d%ej,                  d
ededed&ed'e%d(ed)e%d*ede!ej,                  eej,                     f   fd+       Z& xZ'S )0MarigoldDepthPipelinea7  
    Pipeline for monocular depth estimation using the Marigold method: https://marigoldmonodepth.github.io.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

    Args:
        unet (`UNet2DConditionModel`):
            Conditional U-Net to denoise the depth latent, conditioned on image latent.
        vae (`AutoencoderKL`):
            Variational Auto-Encoder (VAE) Model to encode and decode images and predictions to and from latent
            representations.
        scheduler (`DDIMScheduler` or `LCMScheduler`):
            A scheduler to be used in combination with `unet` to denoise the encoded image latents.
        text_encoder (`CLIPTextModel`):
            Text-encoder, for empty text embedding.
        tokenizer (`CLIPTokenizer`):
            CLIP tokenizer.
        prediction_type (`str`, *optional*):
            Type of predictions made by the model.
        scale_invariant (`bool`, *optional*):
            A model property specifying whether the predicted depth maps are scale-invariant. This value must be set in
            the model config. When used together with the `shift_invariant=True` flag, the model is also called
            "affine-invariant". NB: overriding this value is not supported.
        shift_invariant (`bool`, *optional*):
            A model property specifying whether the predicted depth maps are shift-invariant. This value must be set in
            the model config. When used together with the `scale_invariant=True` flag, the model is also called
            "affine-invariant". NB: overriding this value is not supported.
        default_denoising_steps (`int`, *optional*):
            The minimum number of denoising diffusion steps that are required to produce a prediction of reasonable
            quality with the given model. This value must be set in the model config. When the pipeline is called
            without explicitly setting `num_inference_steps`, the default value is used. This is required to ensure
            reasonable results with various model flavors compatible with the pipeline, such as those relying on very
            short denoising schedules (`LCMScheduler`) and those with full diffusion schedules (`DDIMScheduler`).
        default_processing_resolution (`int`, *optional*):
            The recommended value of the `processing_resolution` parameter of the pipeline. This value must be set in
            the model config. When the pipeline is called without explicitly setting `processing_resolution`, the
            default value is used. This is required to ensure reasonable results with various model flavors trained
            with varying optimal processing resolution values.
    ztext_encoder->unet->vae)depth	disparityunetvae	schedulertext_encoder	tokenizerprediction_typescale_invariantshift_invariantdefault_denoising_stepsdefault_processing_resolutionc                    t         |           || j                  vr&t        j	                  d| d| j                   d       | j                  |||||       | j                  ||||	|
       t        | dd       r/dt        | j                  j                  j                        dz
  z  nd	| _        || _        || _        |	| _        |
| _        d | _        t%        | j                  
      | _        y )Nz*Potentially unsupported `prediction_type='z&'`; values supported by the pipeline: .)r4   r5   r6   r7   r8   )r9   r:   r;   r<   r=   r5   r   r      )vae_scale_factor)super__init__supported_prediction_typesloggerwarningregister_modulesregister_to_configgetattrlenr5   configblock_out_channelsrA   r:   r;   r<   r=   empty_text_embeddingr   image_processor)selfr4   r5   r6   r7   r8   r9   r:   r;   r<   r=   	__class__s              r/   rC   zMarigoldDepthPipeline.__init__   s    	$"A"AANN<_<MMs22316
 	% 	 	
 	+++$;*G 	  	
 W^^bdikoVpc$((//*L*L&MPQ&Q Rvw..'>$-J*$(!5tG\G\]r.   imagenum_inference_stepsensemble_sizeprocessing_resolutionresample_method_inputresample_method_output
batch_sizeensembling_kwargslatents	generatoroutput_typeoutput_uncertaintyreturnc           
       	  
 dt        | j                  j                  j                        dz
  z  }|| j                  k7  rt        d| j                   d| d      |t        d      |dk  rt        d      |dk  rt        d      |dk(  rt        j                  d	       |dkD  r-| j                  s| j                  rt               st        d
      |dk(  r|rt        d      |t        d      |dk  rt        d      || j                  z  dk7  rt        d| j                   d      |dvrt        d      |dvrt        d      |dk  rt        d      |dvrt        d      |	
t        d      |1t        |t              st        d      d|v r|d   dvrt        d      d}d\  }}t        |t              s|g}t        |      D ]  \  }}t        |t         j"                        st%        j&                  |      r]|j(                  dvrt        d| d|j*                   d      |j*                  d d  \  }}d}|j(                  d!k(  rW|j*                  d   }nGt        |t,        j,                        r|j.                  \  }}d}nt        d"| d#t1        |       d      |||}}n!||f||fk7  rt        d$| d%||f d&||f       ||z  }	 |	t%        j&                  |	      st        d'      |	j3                         d!k7  rt        d(|	j*                   d      |dkD  r<t5        ||      }||z  |z  }||z  |z  }|dk(  s|dk(  rt        d)| d*| d+      ||}}|| j                  z   dz
  | j                  z  }|| j                  z   dz
  | j                  z  }||z  | j                  j                  j6                  ||f}|	j*                  |k7  rt        d,|	j*                   d-| d      
t        
t              r=t        
      ||z  k7  rt        d.      t9        
fd/
D              st        d0      |S t        
t$        j:                        st        d1t1        
       d      |S )2Nr   r   z/`vae_scale_factor` computed at initialization (z) differs from the actual one (z).zW`num_inference_steps` is not specified and could not be resolved from the model config.z'`num_inference_steps` must be positive.z!`ensemble_size` must be positive.zk`ensemble_size` == 2 results are similar to no ensembling (1); consider increasing the value to at least 3.z9Make sure to install scipy if you want to use ensembling.zpComputing uncertainty by setting `output_uncertainty=True` also requires setting `ensemble_size` greater than 1.zY`processing_resolution` is not specified and could not be resolved from the model config.r   zx`processing_resolution` must be non-negative: 0 for native resolution, or any positive value for downsampled processing.z.`processing_resolution` must be a multiple of r?   )nearestnearest-exactbilinearbicubicareazy`resample_method_input` takes string values compatible with PIL library: nearest, nearest-exact, bilinear, bicubic, area.zz`resample_method_output` takes string values compatible with PIL library: nearest, nearest-exact, bilinear, bicubic, area.z`batch_size` must be positive.)ptr(   z*`output_type` must be one of `pt` or `np`.z2`latents` and `generator` cannot be used together.z)`ensembling_kwargs` must be a dictionary.	reductionmeanmedianzF`ensembling_kwargs['reduction']` can be either `'mean'` or `'median'`.)NN)r   r      z`image[z(]` has unsupported dimensions or shape: ri   zUnsupported `image[z	]` type: zInput `image[z]` has incompatible dimensions z with the previous images z!`latents` must be a torch.Tensor.z/`latents` has unsupported dimensions or shape: z*Extreme aspect ratio of the input image: [z x ]z`latents` has unexpected shape=z
 expected=z^The number of generators must match the total number of ensemble members for all input images.c              3   ~   K   | ]4  }|j                   j                  d    j                   j                  k(   6 yw)r   N)devicetype).0grZ   s     r/   	<genexpr>z5MarigoldDepthPipeline.check_inputs.<locals>.<genexpr>B  s.     X188==IaL,?,?,D,DDXs   :=z;`generator` device placement is not consistent in the list.zUnsupported generator type: )rJ   r5   rK   rL   rA   
ValueErrorrE   rF   r:   r;   r   ImportError
isinstancedictlist	enumerater(   r)   r*   	is_tensorndimshaper   sizern   dimmaxlatent_channelsall	Generator)rO   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   actual_vae_scale_factor
num_imagesWHiimgH_iW_iN_imax_orignew_Hnew_Wwhshape_expecteds             `                 r/   check_inputsz"MarigoldDepthPipeline.check_inputs   sJ    #$DHHOO,N,N(ORS(S"T"d&;&;;A$BWBWAXXw  yP  xQ  QS  T  &vww"FGG1@AAANN? 1$"6"6$:N:NXjXlYZZA"4"  !(k  !1$*  !4#8#88A=MdNcNcMddefgg (ccC  ")ddC  >=>>l*IJJ9#8QRR(/6 !LMM//4Ek4RZl4l !ijj 
1%&GE& 	FAs#rzz*eooc.B889,$wqc1YZ]ZcZcYdde%fgg99RS>S88q=))A,CC-88S #6qc49+Q!OPPyC1QC:% #A3&EsCj\Qkmnpqlrkst  #J'	, ??7+ !DEE{{}! #RSZS`S`Raab!cdd$q(q!911X=11X=A:!$'QRSQTTWXYWZZ[%\]]e1T***Q.43H3HHAT***Q.43H3HHA(=8$((//:Y:Y[\^_`N}}. #B7==/Q[\j[kkl!mnn  )T*y>Z-%??$x  XiXX$%bcc   	5??; #?Y?PPQ!RSSr.   c                 p   t        | d      si | _        n<t        | j                  t              s"t	        dt        | j                         d      t        di | j                  }|j                  d|      |d<   |j                  d|      |d<   |t        |fi |S |t        dd|i|S t	        d      )	N_progress_bar_configz=`self._progress_bar_config` should be of type `dict`, but is r?   descleavetotalz/Either `total` or `iterable` has to be defined.r-   )hasattrr   rt   ru   rr   rn   getr   )rO   iterabler   r   r   progress_bar_configs         r/   progress_barz"MarigoldDepthPipeline.progress_barI  s    t34(*D%D55t<OPTUYUnUnPoOppqr  #?T%>%>?&9&=&=fd&KF#':'>'>w'NG$8$788;e;':;;NOOr.   match_input_resolutionoutput_latentreturn_dictc                 h   | j                   }| j                  }|| j                  }|| j                  }| j	                  ||||||||	|
|||      }| j
                  `d}| j                  |d| j                  j                  dd      }|j                  j                  |      }| j                  |      d   | _        | j                  j                  |||||      \  }}}| j                  ||
|||      \  }}~| j
                  j                  ||      j                  |d	d	      }g }| j                  t!        d||z  |      dd
      D ]  }||||z    }||||z    }|j"                  d   }|d| } | j$                  j'                  ||       | j                  | j$                  j(                  dd      D ]w  }!t+        j,                  ||gd	      }"| j/                  |"|!| d      d   }#| j$                  j1                  |#|!||      j2                  }t4        sdt7        j8                          y |j;                  |        t+        j,                  |d      }~~~~~~ ~"~#t+        j,                  t!        d|j"                  d   |      D cg c]  }| j=                  ||||z           c}d      }$|sd}| j                  j?                  |$|      }$d}%|d	kD  r |$j@                  ||g|$j"                  d	d  }$t!        |      D cg c]3  } | jB                  |$|   | jD                  | jF                  |fi |	xs i 5 }$}tI        |$ \  }$}%t+        j,                  |$d      }$|rt+        j,                  |%d      }%nd}%|rB| j                  jK                  |$||d      }$|%!|r| j                  jK                  |%||d      }%|dk(  r:| j                  jM                  |$      }$|%|r| j                  jM                  |%      }%| jO                          |s|$|%|fS tQ        |$|%|      S c c}w c c}w )aA  
        Function invoked when calling the pipeline.

        Args:
            image (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`),
                `List[torch.Tensor]`: An input image or images used as an input for the depth estimation task. For
                arrays and tensors, the expected value range is between `[0, 1]`. Passing a batch of images is possible
                by providing a four-dimensional array or a tensor. Additionally, a list of images of two- or
                three-dimensional arrays or tensors can be passed. In the latter case, all list elements must have the
                same width and height.
            num_inference_steps (`int`, *optional*, defaults to `None`):
                Number of denoising diffusion steps during inference. The default value `None` results in automatic
                selection.
            ensemble_size (`int`, defaults to `1`):
                Number of ensemble predictions. Higher values result in measurable improvements and visual degradation.
            processing_resolution (`int`, *optional*, defaults to `None`):
                Effective processing resolution. When set to `0`, matches the larger input image dimension. This
                produces crisper predictions, but may also lead to the overall loss of global context. The default
                value `None` resolves to the optimal value from the model config.
            match_input_resolution (`bool`, *optional*, defaults to `True`):
                When enabled, the output prediction is resized to match the input dimensions. When disabled, the longer
                side of the output will equal to `processing_resolution`.
            resample_method_input (`str`, *optional*, defaults to `"bilinear"`):
                Resampling method used to resize input images to `processing_resolution`. The accepted values are:
                `"nearest"`, `"nearest-exact"`, `"bilinear"`, `"bicubic"`, or `"area"`.
            resample_method_output (`str`, *optional*, defaults to `"bilinear"`):
                Resampling method used to resize output predictions to match the input resolution. The accepted values
                are `"nearest"`, `"nearest-exact"`, `"bilinear"`, `"bicubic"`, or `"area"`.
            batch_size (`int`, *optional*, defaults to `1`):
                Batch size; only matters when setting `ensemble_size` or passing a tensor of images.
            ensembling_kwargs (`dict`, *optional*, defaults to `None`)
                Extra dictionary with arguments for precise ensembling control. The following options are available:
                - reduction (`str`, *optional*, defaults to `"median"`): Defines the ensembling function applied in
                  every pixel location, can be either `"median"` or `"mean"`.
                - regularizer_strength (`float`, *optional*, defaults to `0.02`): Strength of the regularizer that
                  pulls the aligned predictions to the unit range from 0 to 1.
                - max_iter (`int`, *optional*, defaults to `2`): Maximum number of the alignment solver steps. Refer to
                  `scipy.optimize.minimize` function, `options` argument.
                - tol (`float`, *optional*, defaults to `1e-3`): Alignment solver tolerance. The solver stops when the
                  tolerance is reached.
                - max_res (`int`, *optional*, defaults to `None`): Resolution at which the alignment is performed;
                  `None` matches the `processing_resolution`.
            latents (`torch.Tensor`, or `List[torch.Tensor]`, *optional*, defaults to `None`):
                Latent noise tensors to replace the random initialization. These can be taken from the previous
                function call's output.
            generator (`torch.Generator`, or `List[torch.Generator]`, *optional*, defaults to `None`):
                Random number generator object to ensure reproducibility.
            output_type (`str`, *optional*, defaults to `"np"`):
                Preferred format of the output's `prediction` and the optional `uncertainty` fields. The accepted
                values are: `"np"` (numpy array) or `"pt"` (torch tensor).
            output_uncertainty (`bool`, *optional*, defaults to `False`):
                When enabled, the output's `uncertainty` field contains the predictive uncertainty map, provided that
                the `ensemble_size` argument is set to a value above 2.
            output_latent (`bool`, *optional*, defaults to `False`):
                When enabled, the output's `latent` field contains the latent codes corresponding to the predictions
                within the ensemble. These codes can be saved, modified, and used for subsequent calls with the
                `latents` argument.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.marigold.MarigoldDepthOutput`] instead of a plain tuple.

        Examples:

        Returns:
            [`~pipelines.marigold.MarigoldDepthOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.marigold.MarigoldDepthOutput`] is returned, otherwise a
                `tuple` is returned where the first element is the prediction, the second element is the uncertainty
                (or `None`), and the third is the latent (or `None`).
        N 
do_not_padTrd   )padding
max_length
truncationreturn_tensorsr   )rm   dtyper   zMarigold predictions...)r   r   )rm   FzDiffusion steps...r|   )encoder_hidden_statesr   )rZ   )is_aar(   )r!   r"   r#   ))_execution_devicer   r<   r=   r   rM   r8   model_max_length	input_idstor7   rN   
preprocessprepare_latentsrepeatr   rangerz   r6   set_timesteps	timestepsr*   catr4   stepprev_sampleXLA_AVAILABLExm	mark_stepappenddecode_predictionunpad_imagereshapeensemble_depthr:   r;   zipresize_antialiaspt_to_numpymaybe_free_model_hooksr    )&rO   rQ   rR   rS   rT   r   rU   rV   rW   rX   rY   rZ   r[   r\   r   r   rm   r   r   prompttext_inputstext_input_idsr   original_resolutionimage_latentpred_latentbatch_empty_text_embeddingpred_latentsr   batch_image_latentbatch_pred_latenteffective_batch_sizetexttbatch_latentnoiser!   r"   s&                                         r/   __call__zMarigoldDepthPipeline.__call__\  s   t ''

 &"&">"> ($($F$F! &&!!"

" $$,F..$>>::# ) K )2255f=N(,(9(9.(I!(LD% /3.B.B.M.M(*?/
+w+ %)$8$87I}j%
!k %)%>%>%A%AW\%A%]%d%d1&
" ""!Z-/<DOh # 
 	3A ".a!j.!A +AJ ?#5#;#;A#> -.C/CDDNN(()<V(L&&t~~'?'?uSg&h #$yy*<>O)PVWX		,[`	abcd$(NN$7$71/9 %8 %+ " !LLN#  12'	3* ii!4 & YY q+"3"3A"6
C &&{1q:~'FG 

 K ))55j'J
 1+++J]
HXHXYZY[H\]J z*	  $##qM((((&	
 ).B	J 	 '*:&6#J:15J!#ii;" "-->>/1Gu ? J &+="22CC!46LTY D 
 $--99*EJ&+="22>>{K 	##%[99"!#
 	
*	s   P*
8P/c                    d }t        j                  t        d|j                  d   |      D cg c])  } || j                  j                  ||||z                + c}d      }|| j                  j                  j                  z  }|j                  |d      }|}	|	-t        |j                  ||j                  |j                        }	||	fS c c}w )Nc                     t        | d      r| j                  j                         S t        | d      r| j                  S t	        d      )Nlatent_distrY   z3Could not access latents of provided encoder_output)r   r   moderY   AttributeError)encoder_outputs    r/   retrieve_latentsz?MarigoldDepthPipeline.prepare_latents.<locals>.retrieve_latentsv  sC    ~}5%1166883%---$%Z[[r.   r   r   )rZ   rm   r   )r*   r   r   rz   r5   encoderK   scaling_factorrepeat_interleaver   rm   r   )
rO   rQ   rY   rZ   rS   rW   r   r   r   r   s
             r/   r   z%MarigoldDepthPipeline.prepare_latentsn  s    	\ yy q%++a.*= !q1z>1J!KL 
 $dhhoo&D&DD#55m5K&""##**"((	K [((%s   .Cr   c                    |j                         dk7  s0|j                  d   | j                  j                  j                  k7  r:t        d| j                  j                  j                   d|j                   d      | j                  j                  || j                  j                  j                  z  d      d   }|j                  dd	
      }t        j                  |dd      }|dz   dz  }|S )Nri   r   z Expecting 4D tensor of shape [B,z,H,W]; got r?   F)r   r   Tr|   keepdimg            ?g       @)r|   rz   r5   rK   r~   rr   decoder   rg   r*   clip)rO   r   r!   s      r/   r   z'MarigoldDepthPipeline.decode_prediction  s    ??![%6%6q%9TXX__=\=\%\2488??3R3R2SS^_j_p_p^qqrs  XX__[488??3Q3Q%Q_d_efgh
__D_9
ZZ
D#6
 3&#-
r.   r2   re   regularizer_strengthmax_itertolmax_resc	           
         | j                         dk7  s| j                  d   dk7  rt        d| j                   d      dvrt        d d      srt        d      dt        j                  ffd	dt        j                  d
t
        j                  dt        j                  ffd	 ddt        j                  dt        dt        t        j                  t        t        j                     f   ffdd
t
        j                  dt        j                  dt        ffddt        j                  ffd}	xs }
| j                  d   |
r |	|       } | |      }  | |      \  } }| j                         }rr| j                         }nrd}nt        d      ||z
  j                  d      }| |z
  |z  } |r||z  }| |fS )a	  
        Ensembles the depth maps represented by the `depth` tensor with expected shape `(B, 1, H, W)`, where B is the
        number of ensemble members for a given prediction of size `(H x W)`. Even though the function is designed for
        depth maps, it can also be used with disparity maps as long as the input tensor values are non-negative. The
        alignment happens when the predictions have one or more degrees of freedom, that is when they are either
        affine-invariant (`scale_invariant=True` and `shift_invariant=True`), or just scale-invariant (only
        `scale_invariant=True`). For absolute predictions (`scale_invariant=False` and `shift_invariant=False`)
        alignment is skipped and only ensembling is performed.

        Args:
            depth (`torch.Tensor`):
                Input ensemble depth maps.
            scale_invariant (`bool`, *optional*, defaults to `True`):
                Whether to treat predictions as scale-invariant.
            shift_invariant (`bool`, *optional*, defaults to `True`):
                Whether to treat predictions as shift-invariant.
            output_uncertainty (`bool`, *optional*, defaults to `False`):
                Whether to output uncertainty map.
            reduction (`str`, *optional*, defaults to `"median"`):
                Reduction method used to ensemble aligned predictions. The accepted values are: `"mean"` and
                `"median"`.
            regularizer_strength (`float`, *optional*, defaults to `0.02`):
                Strength of the regularizer that pulls the aligned predictions to the unit range from 0 to 1.
            max_iter (`int`, *optional*, defaults to `2`):
                Maximum number of the alignment solver steps. Refer to `scipy.optimize.minimize` function, `options`
                argument.
            tol (`float`, *optional*, defaults to `1e-3`):
                Alignment solver tolerance. The solver stops when the tolerance is reached.
            max_res (`int`, *optional*, defaults to `1024`):
                Resolution at which the alignment is performed; `None` matches the `processing_resolution`.
        Returns:
            A tensor of aligned and ensembled depth maps and optionally a tensor of uncertainties of the same shape:
            `(1, 1, H, W)`.
        ri   r   z,Expecting 4D tensor of shape [B,1,H,W]; got r?   rf   Unrecognized reduction method: z1Pure shift-invariant ensembling is not supported.r2   c                 $   | j                  d      j                  d      j                  }| j                  d      j                  d      j                  }rTrRd||z
  j	                  d      z  }| |z  }t        j                  ||f      j                         j                         }nAr4d|j	                  d      z  }|j                         j                         }nt        d      |j                  t        j                        }|S )Nr   r   r   ư>minUnrecognized alignment.)r   r   valuesr}   clampr*   r   cpunumpyrr   astyper(   float64)	r2   init_mininit_maxinit_sinit_tparamrS   r:   r;   s	         r/   
init_paramz8MarigoldDepthPipeline.ensemble_depth.<locals>.init_param  s    }}]B7;;;BIIH}}]B7;;;BIIH?8 3::t:DD 8+		66"23779??A x~~$~77

**, !:;;LL,ELr.   r   r]   c                    rrt        j                  |d      \  }}t        j                  |      j	                  |       j                  ddd      }t        j                  |      j	                  |       j                  ddd      }| |z  |z   }|S r=t        j                  |      j	                  |       j                  ddd      }| |z  }|S t        d      )Nr   r   r   )r(   splitr*   
from_numpyr   viewrr   )r2   r   sr   outrS   r:   r;   s        r/   alignz3MarigoldDepthPipeline.ensemble_depth.<locals>.align  s    ?xxq)1$$Q'**5166}aAN$$Q'**5166}aANai!m J !$$U+..u5::=!QPQRai J !!:;;r.   depth_alignedreturn_uncertaintyc                 f   d }dk(  r6t        j                  | dd      }|rt        j                  | dd      }||fS dk(  r`t        j                  | dd      j                  }|r8t        j                  t        j
                  | |z
        dd      j                  }||fS t        d d      )Nrg   r   Tr   rh   r   r?   )r*   rg   stdrh   r   absrr   )r  r  r"   r!   re   s       r/   ensemblez6MarigoldDepthPipeline.ensemble_depth.<locals>.ensemble  s     KF""ZZ1dK
%"'))Mq$"OK {** h&"\\-QMTT
%"',,uyy9S/TZ[ei"j"q"qK {** !#B9+Q!OPPr.   c                    d} ||       }t        j                  t        j                              D ]B  \  }}||   ||   z
  }||dz  j                         j	                         j                         z  }D dkD  rs |d      \  }}|j                         j                         j                         }	d|j                         z
  j                         j                         }
||	|
z   z  z  }|S )Ng        r   r   Fr  r   )	r*   combinationsarangerg   sqrtitemr   r  r}   )r   r2   costr  r   jdiffr!   _err_nearerr_farr   r  rS   r   s              r/   cost_fnz5MarigoldDepthPipeline.ensemble_depth.<locals>.cost_fn  s    D!%/M**5<<+FG 71$Q'-*::q(--/44667 $a' (5 Q
A%>>+//1668!11668==?G+/CCCKr.   c           	      6   dd l }| j                  t        j                        }2t	        |j
                  dd        kD  rt        j                  |d      } |      }|j                  j                  t        |      |d	dd      }|j                  S )	Nr   r   r`   )r2   BFGSF)maxiterdisp)methodr   options)scipyr   r*   float32r}   rz   r   resize_to_max_edgeoptimizeminimizer   x)
r2   r  depth_to_alignr   resr  r   r   r   r   s
        r/   compute_paramz;MarigoldDepthPipeline.ensemble_depth.<locals>.compute_param  s    "XXemm4N"s>+?+?+C'Dw'N!7!J!J>[bds!t~.E..))~6$,e< * C 55Lr.   r   r  r   r   r   )F)r|   rz   rr   r*   r+   r(   r)   boolr	   r   floatr}   r   r   )r2   r:   r;   r\   re   r   r   r   r   r"  requires_aligningr   r"   	depth_max	depth_mindepth_ranger   r  r  rS   r   s    `` `````       @@@@@r/   r   z$MarigoldDepthPipeline.ensemble_depth  s   \ 99;!u{{1~2KEKK=XYZ[[..>ykKLL?PQQ	ell 	"	 	bjj 	U\\ 	 EJ	+ <<	+=A	+5<<%,,!778	+ 	2:: 	ell 	u 	 	 	 	 	& ,>A!%(E%'E%e@RS{IIK			II677 9,333="k1;&Kk!!r.   )NTTNN)NNNT)Nr   NTra   ra   r   NNNr(   FFT)TTFrh   g{Gz?r   gMbP?i   )(r$   r%   r&   r'   model_cpu_offload_seqrD   r   r   r
   r   r   r   r   r   strr#  intrC   r   r   r   r*   r+   r   r   r   compilerdisabler   no_gradr   EXAMPLE_DOC_STRINGr   r	   r   r   staticmethodr$  r   __classcell__)rP   s   @r/   r1   r1   i   s    'R 6!7 *.*.*.157;-^"-^ -^ 45	-^
 $-^ !-^ "#-^ "$-^ "$-^ "*#-^ (0}-^^B!B !B 	B
  #B  #B !$B B $DcN3B %,,'B E%//43H"HIJB B !B 
BH ^^P P$ U]]_12 .2/3'+%/&06:EIMQ#(# !N
!N
 &c]N
 	N

  (}N
 !%N
  #N
 !$N
 N
 $DcN3N
 %d5<<.@ @ABN
 E%//43H"HIJN
 N
 !N
 N
  !N
 3 N
`#)||#) %,,'#) EOO,	#)
 #) #) 
u||U\\)	*#)JU\\ ell   !% $#(!&*Z"||Z"Z" Z" !	Z"
 Z" $Z" Z" Z" Z" 
u||Xell33	4Z" Z"r.   r1   )5dataclassesr   	functoolsr   typingr   r   r   r   r	   r
   r   r(   r*   PILr   	tqdm.autor   transformersr   r   rN   r   modelsr   r   
schedulersr   r   utilsr   r   r   r   utils.import_utilsr   utils.torch_utilsr   pipeline_utilsr   marigold_image_processingr   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr$   rE   r/  r    r1   r-   r.   r/   <module>rC     s   & "  : :     5 1  5 - . = ))MM			H	% , &* & &,S"- S"r.   