
    bie*                         d dl mZ d dlmZmZmZ d dlZd dlmZ ddl	m
Z
mZ ddlmZ ddlmZmZmZ dd	lmZ d
dlmZmZmZmZ e G d de             Z G d dee
      Zy)    )	dataclass)OptionalTupleUnionN   )ConfigMixinregister_to_config)
BaseOutput   )GaussianFourierProjectionTimestepEmbedding	Timesteps)
ModelMixin   )get_down_blockget_mid_blockget_out_blockget_up_blockc                   0    e Zd ZU dZej
                  ed<   y)UNet1DOutputz
    The output of [`UNet1DModel`].

    Args:
        sample (`torch.Tensor` of shape `(batch_size, num_channels, sample_size)`):
            The hidden states output from the last layer of the model.
    sampleN)__name__
__module____qualname____doc__torchTensor__annotations__     Y/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/models/unets/unet_1d.pyr   r      s     LLr    r   c            &           e Zd ZdZdgZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddedee   dedededed	e	d
e	de
dee   dee   dee   dedee   dededede	f$ fd       Z	 ddej                  deej                  e
ef   de	deeef   fdZ xZS )UNet1DModela  
    A 1D UNet model that takes a noisy sample and a timestep and returns a sample shaped output.

    This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
    for all models (such as downloading or saving).

    Parameters:
        sample_size (`int`, *optional*): Default length of sample. Should be adaptable at runtime.
        in_channels (`int`, *optional*, defaults to 2): Number of channels in the input sample.
        out_channels (`int`, *optional*, defaults to 2): Number of channels in the output.
        extra_in_channels (`int`, *optional*, defaults to 0):
            Number of additional channels to be added to the input of the first down block. Useful for cases where the
            input data has more channels than what the model was initially designed for.
        time_embedding_type (`str`, *optional*, defaults to `"fourier"`): Type of time embedding to use.
        freq_shift (`float`, *optional*, defaults to 0.0): Frequency shift for Fourier time embedding.
        flip_sin_to_cos (`bool`, *optional*, defaults to `False`):
            Whether to flip sin to cos for Fourier time embedding.
        down_block_types (`Tuple[str]`, *optional*, defaults to `("DownBlock1DNoSkip", "DownBlock1D", "AttnDownBlock1D")`):
            Tuple of downsample block types.
        up_block_types (`Tuple[str]`, *optional*, defaults to `("AttnUpBlock1D", "UpBlock1D", "UpBlock1DNoSkip")`):
            Tuple of upsample block types.
        block_out_channels (`Tuple[int]`, *optional*, defaults to `(32, 32, 64)`):
            Tuple of block output channels.
        mid_block_type (`str`, *optional*, defaults to `"UNetMidBlock1D"`): Block type for middle of UNet.
        out_block_type (`str`, *optional*, defaults to `None`): Optional output processing block of UNet.
        act_fn (`str`, *optional*, defaults to `None`): Optional activation function in UNet blocks.
        norm_num_groups (`int`, *optional*, defaults to 8): The number of groups for normalization.
        layers_per_block (`int`, *optional*, defaults to 1): The number of layers per block.
        downsample_each_block (`int`, *optional*, defaults to `False`):
            Experimental feature for using a UNet without upsampling.
    normsample_sizesample_ratein_channelsout_channelsextra_in_channelstime_embedding_typeflip_sin_to_cosuse_timestep_embedding
freq_shiftdown_block_typesup_block_typesmid_block_typeout_block_typeblock_out_channelsact_fnnorm_num_groupslayers_per_blockdownsample_each_blockc           
      ,   t         !|           || _        |dk(  rt        ddd|      | _        d|d   z  }n |dk(  rt        |d   ||	      | _        |d   }|r|d   d	z  }t        |||d   
      | _        t        j                  g       | _
        d | _        t        j                  g       | _        d | _        |}t        |
      D ][  \  }}|}||   }|dk(  r||z  }|t        |      dz
  k(  }t!        |||||d   | xs |      }| j                  j#                  |       ] t%        ||d   |d   |d   |d   ||      | _        t'        t)        |            }|d   }||}n|d   }t        |      D ]e  \  }}|}|t        |      dz
  k  r||dz      n|}|t        |      dz
  k(  }t+        |||||d   |       }| j                  j#                  |       |}g ||nt-        |d   d	z  d      } t/        || |d   |||d   d	z        | _        y )Nfourier   F)embedding_sizeset_W_to_weightlogr+   r   r   
positional)r+   downscale_freq_shift   )r'   time_embed_dimr3   out_dimr   )
num_layersr'   r(   temb_channelsadd_downsample)r'   mid_channelsr(   	embed_dimrB   rD   )rB   r'   r(   rC   add_upsample    )r1   num_groups_outrG   r(   r3   fc_dim)super__init__r%   r   	time_projr   r   time_mlpnn
ModuleListdown_blocks	mid_block	up_blocks	out_block	enumeratelenr   appendr   listreversedr   minr   )"selfr%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   timestep_input_dimr@   output_channelidown_block_typeinput_channelis_final_block
down_blockreversed_block_out_channelsfinal_upsample_channelsup_block_typeprev_output_channelup_blockrJ   	__class__s"                                    r!   rM   zUNet1DModel.__init__L   s   , 	& )+6 %UTcDN "#%7%:!: L0&"1%]gDN "4A!6!/2Q6N-.-*1-	DM ==,r* %"+,<"= 	0A*M/2NAv!22#&8"9A"==N'+)+03#11J5JJ ##J/#	0( '*2.+B/+B/(+'0
 '+84F+G&H#4Q7!&2#&8&;# ). 9 	1A}"067#n:MPQ:Q6Q+AE2Wn  #&8"9A"==N#+/+03!//H NN!!(+"0#	1( -<,GSQcdeQfjkQkmoMp&))(+%%b)Q.
r    r   timestepreturn_dictreturnc                    |}t        j                  |      s2t        j                  |gt         j                  |j                        }nKt        j                  |      r6t        |j                        dk(  r|d   j                  |j                        }| j                  |      }| j                  j                  r+| j                  |j                  |j                              }nm|d   }|j                  dd|j                  d   g      j                  |j                        }|j                  |j                  dd |j                  dd z         }d}| j                  D ]  } |||      \  }}||z  } | j                   r| j!                  ||      }t#        | j$                        D ]  \  }	}
|d	d }|dd	 } |
|||
      } | j&                  r| j'                  ||      }|s|fS t)        |      S )a  
        The [`UNet1DModel`] forward method.

        Args:
            sample (`torch.Tensor`):
                The noisy input tensor with the following shape `(batch_size, num_channels, sample_size)`.
            timestep (`torch.Tensor` or `float` or `int`): The number of timesteps to denoise an input.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~models.unets.unet_1d.UNet1DOutput`] instead of a plain tuple.

        Returns:
            [`~models.unets.unet_1d.UNet1DOutput`] or `tuple`:
                If `return_dict` is True, an [`~models.unets.unet_1d.UNet1DOutput`] is returned, otherwise a `tuple` is
                returned where the first element is the sample tensor.
        )dtypedevicer   N).Nr   r   r   )hidden_statestembrE   )res_hidden_states_tuplerq   )r   )r   	is_tensortensorlongro   rW   shapetorN   configr,   rO   rn   repeatbroadcast_torR   rS   rV   rT   rU   r   )r\   r   rj   rk   	timestepstimestep_embeddown_block_res_samplesdownsample_blockres_samplesr_   upsample_blocks              r!   forwardzUNet1DModel.forward   s   . 	y)i[

6==YI__Y'C	,@A,E!$**6==9I	2;;--!]]>+<+<V\\+JKN+I6N+22Aq&,,q/3JKNNv||\N+88&,,r:J^MaMabcbdMe:egN "$ $ 0 0 	2"2n"]FK"k1"	2
 >>^^FN;F "+4>>!: 	fA~05K%;CR%@"#FKVdeF	f >>^^FN;F96**r    )i   Nr   r   r   r8   TFg        )DownBlock1DNoSkipDownBlock1DAttnDownBlock1D)AttnUpBlock1D	UpBlock1DUpBlock1DNoSkipUNetMidBlock1DN)rI   rI   @   Nr9   r   F)T)r   r   r   r    _skip_layerwise_casting_patternsr	   intr   strboolfloatr   rM   r   r   r   r   r   __classcell__)ri   s   @r!   r#   r#   )   s   @ )/x$ !%)!"#, $','^%V%5")5  !&+'v
v
 c]v
 	v

 v
 v
 !v
 v
 !%v
 v
  *v
 c
v
 c
v
 v
 "#Jv
  !v
" #v
$ %v
&  $'v
 v
x !	<+<+ eS01<+ 	<+
 
|U"	#<+r    r#   )dataclassesr   typingr   r   r   r   torch.nnrP   configuration_utilsr   r	   utilsr
   
embeddingsr   r   r   modeling_utilsr   unet_1d_blocksr   r   r   r   r   r#   r   r    r!   <module>r      sV    " ) )   B  P P ' V V 	: 	 	X+*k X+r    