
    uki
                        d dl mZ d dlmc mZ d dlmc mc mZ d dl	Z
d dlmZ ddlmZ ddlmZ  edd      Z ed	d      Zer]ej'                         j)                         D ]0  \  ZZej/                  d
      rdnd Z ej2                  eede       2 ej4                  Zer]ej'                         j)                         D ]0  \  ZZej/                  d
      rdnd Z ej2                  eede       2 ej4                  ZdedededededefdZ eeed      Z eeed      Zd Z dedededededefdZ! ee!ed      Z" ee!ed      Z#y)    )partialN)
xla_client   )GpuLibNotLinkedError)import_from_plugincuda_rnnrocm_ffiCUDA)platformapi_versionROCM
input_sizehidden_size
num_layersdropoutbidirectionalcudnn_allow_tf32c                4   |j                   d   j                  }|t        j                  k(  rt        j
                  j                         }n|t        j                  k(  rt        j                  j                         }n|t        j                  k(  r<t        j                  j                  t        j
                  j                               }n]|t        j                  k(  r<t        j                  j                  t        j                  j                               }nt        d|       t        j                  j                  |j                   d   j                  |      }|j                  d   j                  d   }|j                  d   j                  d   }t!        ||	|
|||||      \  }}|f}t        j                  j                  |t        j
                  j                               }|j                   d   j                  }t        j                  j                  |t        j
                  j                               }| s
t#               | j%                  ||	|
||||||d   |d   
      }t        j&                  j)                  d      }t+        j,                  ||j.                  |j.                  ||g|||||gt        j0                  j                  | d      t        j2                  j                  d      t        j4                  j                  dt        j0                  j                  |      i      t        j6                  j                  |d	      t        j8                  j                  g       
      }|j:                  dd |j:                  dd z   S )z
CuDnn RNN.r   zUnknown output type r          dnn_rnn_ffiFopaque   )call_target_namehas_side_effectbackend_configr   called_computationsN)	avals_outdtypenpfloat32irF32Typegetfloat64F64Type	complex64ComplexType
complex128
ValueErrorRankedTensorTypeshapeavals_in)compute_rnn_workspace_reserve_space_sizesr   build_rnn_descriptorIntegerTypeget_signlesshloCustomCallOptype
StringAttrBoolAttrDictAttrIntegerAttr	ArrayAttrresults)r	   r   ctxinputh_0c_0weightsseq_lengthsr   r   r   r   r   r   	out_dtypeout_typeoutput_type
batch_sizemax_seq_lengthworkspace_size_workspace_shapeworkspace_typereserve_space_shapereserve_space_typer   i32_typeouts                               I/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jaxlib/gpu_rnn.py_rnn_fwd_loweringrS   ,   s   
 mmA$$)"**zz~~HBJJzz~~HBLL ~~!!"**.."23HBMM!~~!!"**.."23H
+I;7
88##''a(8(>(>I+||A$$Q'*<<?((+.?+z:~}.0.! $%/&&**?BJJNN<LM.a(..**../B/1zz~~/?A	

  $$Zj%/%24D%4Q%7%8%;	=& ^^((,(CHHchh8JKc3-}}((H:[)ABkkooe,[[__h0A0A&0I%JK..$$Xq1,,**2.	# 
Sb	CKK,	,,    cuhipc                     t        j                  t        j                  j	                  t        j                  | t
        j                        t        j                  j	                                     S )N)r#   )r8   )	r6   constantr&   DenseElementsAttrr(   r$   zerosr%   r'   )r0   s    rR   _hlo_zeros_f32r[   b   sN    	
((5


+"**..2B  D
E ErT   c                   |j                   d   j                  d   }|j                   d   j                  d   }t        ||||||||      \  }}|f}t        j                  j                  |t        j                  j                               }|j                   d   j                  }| t        d      | j                  |||||||||d   |d   
      }t        j                  j                  d      }t        |j                  d   j                        }t        j                  |j                  |j                  |j                  |	j                  |g|||||||	|
|||gt        j                   j                  | d      t        j"                  j                  d	      t        j$                  j                  d
t        j                   j                  |      i      t        j&                  j                  |d      t        j(                  j                  g       t        j(                  j                  t        j*                  j                  dgdg       g            }|j,                  dd S )zCuDnn RNN Backward pass.r   r   r      Nzcuda couldn't be importedr   dnn_rnn_bwd_ffiFr   r   	   )output_tuple_indicesoperand_indexoperand_tuple_indices)r   r   r   r   r   output_operand_aliasesr!   )r1   r0   r2   r&   r/   r(   r'   RuntimeErrorr3   r4   r5   r[   r"   r6   r7   r8   r9   r:   r;   r<   r=   OutputOperandAliasr>   )r	   r   r?   dydhndcnxh0c0wyreserve_spacerD   r   r   r   r   r   r   rH   rI   rJ   rK   rL   rM   rN   r   rP   	zeroed_dwrQ   s                                 rR   _rnn_bwd_loweringrp   h   s   
 ||A$$Q'*<<?((+.?+z:~}.0.! $%/&&**?BJJNN<LM.Q--	\
2
33$$Zj%/%24D%4Q%7%8%;	=& ^^((,(S]]1-334)vvrww8
c32r1a	
; }}((H:_)EFkkooe,[[__h0A0A&0I%JK..$$Xq1,,**2.\\--

 
 
$
$$%3$& % (/ 		
#  
Sb	rT   )$	functoolsr   jaxlib.mlir.irmlirr&   jaxlib.mlir.dialects.stablehlodialects	stablehlor6   numpyr$   jaxlibr   gpu_common_utilsr   plugin_supportr   	_cuda_rnn_hip_rnnregistrationsitems_name_valueendswithr   register_custom_call_targetr2   intboolrS   cudnn_rnn_loweringmiopen_rnn_loweringr[   rp   cudnn_rnn_bwd_loweringmiopen_rnn_bwd_lowering rT   rR   <module>r      s      , ,   2 .vv.	ff- ..0668 DmeV~~f-!1K*J**5&67BDD /8.a.a+--/557 DmeV~~f-!1K*J**5&67BDD /7.`.`+0-#&0-580-FI0- $0-590- *.0-d .	4@ /5A E(FI((+(9<(GK( +/( CG(T !!2ItD !"3XuE rT   