
    bi                     b    d dl Zd dlZd dlZddlmZ ddlmZ ddlm	Z	 ddl
mZ  G d de      Zy)	    N   )UNet1DModel)DiffusionPipeline)DDPMScheduler)randn_tensorc                   V     e Zd ZdZdededef fdZd Zd Zd Z	d	 Z
d
 ZddZ xZS )ValueGuidedRLPipelineal  
    Pipeline for value-guided sampling from a diffusion model trained to predict sequences of states.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
    implemented for all pipelines (downloading, saving, running on a particular device, etc.).

    Parameters:
        value_function ([`UNet1DModel`]):
            A specialized UNet for fine-tuning trajectories base on reward.
        unet ([`UNet1DModel`]):
            UNet architecture to denoise the encoded trajectories.
        scheduler ([`SchedulerMixin`]):
            A scheduler to be used in combination with `unet` to denoise the encoded trajectories. Default for this
            application is [`DDPMScheduler`].
        env ():
            An environment following the OpenAI gym API to act in. For now only Hopper has pretrained models.
    value_functionunet	schedulerc                 N   t         |           | j                  ||||       |j                         | _        i | _        | j                  j                         D ]-  }	 | j                  |   j                         | j
                  |<   / i | _        | j                  j                         D ]-  }	 | j                  |   j                         | j                  |<   / |j                  j                  d   | _        |j                  j                  d   | _        y #  Y xY w#  Y xxY w)N)r
   r   r   envr   )super__init__register_modulesget_datasetdatameanskeysmeanstdsstdobservation_spaceshape	state_dimaction_space
action_dim)selfr
   r   r   r   key	__class__s         j/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/experimental/rl/value_guided_sampling.pyr   zValueGuidedRLPipeline.__init__,   s    	^$R[adeOO%	
99>># 	C"&))C."5"5"7

3	
 	99>># 	C!%3!3!3!5		#	
 ..44Q7**003
s   *D0*D D D$c                 F    || j                   |   z
  | j                  |   z  S N)r   r   r   x_inr   s      r!   	normalizezValueGuidedRLPipeline.normalizeG   s"    tzz#&$))C.88    c                 F    || j                   |   z  | j                  |   z   S r#   )r   r   r$   s      r!   de_normalizez"ValueGuidedRLPipeline.de_normalizeJ   s"    diin$tzz#66r'   c                 ^   t        |t              r3|j                         D ci c]  \  }}|| j                  |       c}}S t	        j
                  |      r%|j                  | j                  j                        S t	        j                  || j                  j                        S c c}}w )Ndevice)

isinstancedictitemsto_torchtorch	is_tensortor   r,   tensor)r   r%   kvs       r!   r0   zValueGuidedRLPipeline.to_torchM   s{    dD!48JJLADAqAt}}Q''AA__T"77499++,,||D)9)9:: Bs   B)c                 j    |j                         D ]  \  }}|j                         |d d ||d f<   ! |S r#   )r/   clone)r   r%   condact_dimr   vals         r!   reset_x0zValueGuidedRLPipeline.reset_x0T   s9    

 	1HC%(YY[DC!"	1r'   c           
      \   |j                   d   }d }t        j                  | j                  j                        D ]  }t	        j
                  |f|| j                  j                  t        j                        }t        |      D ]  }	t	        j                         5  |j                          | j                  |j                  ddd      |      j                  }t        j                  j!                  |j#                         g|g      d   }
| j                  j%                  |      }t	        j&                  d|z        }||
z  }
d d d        d
|dk  <   |j)                         }|||
z  z   }| j+                  ||| j,                        } | j                  |j                  ddd      |      j                  j                  ddd      }| j                  j/                  |||      d   }| j+                  ||| j,                        }| j1                  |      } ||fS # 1 sw Y   xY w)Nr   )r,   dtype      g      ?prev_sample)r   tqdmr   	timestepsr1   fullr   r,   longrangeenable_gradrequires_grad_r
   permutesampleautogradgradsum_get_varianceexpdetachr<   r   stepr0   )r   x
conditionsn_guide_stepsscale
batch_sizeyirC   _rL   posterior_variance	model_stdprev_xs                 r!   run_diffusionz#ValueGuidedRLPipeline.run_diffusionY   s   WWQZ
4>>334 	!A

J=!DII<L<LTYT^T^_I=) B&&( 	,$$& ++AIIaA,>	JQQA >>..y1#>qAD)-)E)Ea)H& %		#0B*B CI$t+D	, '(Y]#HHJ$MM!ZAB" YYqyyAq19=DDLLQPQSTUF ##FAq1-@A aT__=Aa A7	!8 !t1	, 	,s   B)H""H+c                    | j                  |d      }|d    j                  |d      }d| j                  |      i}||| j                  | j                  z   f}t        || j                  j                        }| j                  ||| j                        }	| j                  |	      }	| j                  |	|||      \  }	}
|
j                  dd      j                         }|	|   }|d d d d d | j                  f   }|j                         j                         j                         }| j                  |d      }|
d}n t         j"                  j%                  d|      }||df   }|S )	Nobservationsr   )axisr+   T)
descendingactions)r   )r&   repeatr0   r   r   r   r   r,   r<   r]   argsortsqueezerP   cpunumpyr)   nprandomrandint)r   obsrV   planning_horizonrT   rU   rS   r   x1rR   rW   
sorted_idxsorted_valuesrb   denorm_actionsselected_indexs                   r!   __call__zValueGuidedRLPipeline.__call__z   s]   nnS.1$iz2s+,
-t~~/OP %		(8(89MM"j$//:MM! !!!ZF1 YYqTY2::<
*1&7&7 78.."&&(..0**7	*B =N  YY..q*=N'(9:r'   )@       r?   g?)__name__
__module____qualname____doc__r   r   r   r&   r)   r0   r<   r]   rr   __classcell__)r    s   @r!   r	   r	      sH    $4#4 4 !	4697;
Br'   r	   )rg   rh   r1   rB   models.unets.unet_1dr   	pipelinesr   utils.dummy_pt_objectsr   utils.torch_utilsr   r	    r'   r!   <module>r      s+       / * 3 -@- @r'   