
    piC                         d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlZd dl	Z	d dl
mZmZ eeeee   z  f   Z ej$                  e      Z G d d      Z G d d	      Z G d
 d      Z G d de      Z G d d      Zy)    N)OrderedDict)Mapping)Any)InferenceSession
RunOptionsc                       e Zd Zedededefd       Zededefd       Zedefd       Zedefd       Z	ed	e
j                  fd
       Zedej                  fd       Zededeee
j                  f   fd       Zy)
TypeHelperort_sessionnamereturnc                     t        | j                               D ]"  \  }}|j                  |k(  s|j                  c S  t	        d| d      )Nzinput name 
 not found)	enumerate
get_inputsr   type
ValueError)r
   r   _iinputs       l/opt/services/ai/voice_agent/venv/lib/python3.12/site-packages/onnxruntime/transformers/io_binding_helper.pyget_input_typezTypeHelper.get_input_type   sO    ";#9#9#;< 	"IBzzT!zz!	" ;tfJ788    c                     t        | j                               D ]"  \  }}|j                  |k(  s|j                  c S  t	        d| d      )Nzoutput name r   )r   get_outputsr   r   r   )r
   r   r   outputs       r   get_output_typezTypeHelper.get_output_type   sO    #K$;$;$=> 	#JB{{d"{{"	# <vZ899r   ort_typec                     t         j                  t         j                  t         j                  t         j                  t
        t         j                  d}| |vrt        |  d      ||    S )N)tensor(int64)tensor(int32)tensor(float)tensor(float16)tensor(bool)tensor(uint8) not found in map)numpylonglongintcfloat32float16booluint8r   )r   ort_type_to_numpy_type_maps     r   ort_type_to_numpy_typez!TypeHelper.ort_type_to_numpy_type"   s[     #^^"ZZ"]]$}} "[[&
" 55z):;<<)(33r   c                    t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  t         j                  d}| |vrt        |  d      ||    S )N)r   r   r    r!   ztensor(bfloat16)r"   r#   r$   )	torchint64int32r(   r)   bfloat16r*   r+   r   )r   ort_type_to_torch_type_maps     r   ort_type_to_torch_typez!TypeHelper.ort_type_to_torch_type1   sf     #[["[["]]$}} %!JJ"[[&
" 55z):;<<)(33r   
numpy_typec                    t         j                  t        j                  t         j                  t        j
                  t         j
                  t        j
                  t         j                  t        j                  t         j                  t        j                  t        t        j                  t         j                  t        j                  i}| |vrt        |  d      ||    S Nr$   )r%   r&   r/   r0   r'   r1   r(   r)   r*   r+   r   )r5   numpy_type_to_torch_type_maps     r   numpy_type_to_torch_typez#TypeHelper.numpy_type_to_torch_typeA   s     NNEKKJJKKMM5==MM5==%**KK(
$ 99
|+<=>>+J77r   
torch_typec                    t         j                  t        j                  t         j                  t        j
                  t         j                  t        j                  t         j                  t        j                  t         j                  t        t         j                  t        j                  i}| |vrt        |  d      ||    S r7   )r/   r0   r%   r&   r1   r'   r(   r)   r*   r+   r   )r:   torch_type_to_numpy_type_maps     r   torch_type_to_numpy_typez#TypeHelper.torch_type_to_numpy_typeQ   s     KKKKMM5==MM5==JJKK(
$ 99
|+<=>>+J77r   c                    i }| j                         D ].  }t        j                  |j                        ||j                  <   0 | j                         D ].  }t        j                  |j                        ||j                  <   0 |S )z:Create a mapping from input/output name to numpy data type)r   r	   r-   r   r   r   )r
   name_to_numpy_typer   r   s       r   get_io_numpy_type_mapz TypeHelper.get_io_numpy_type_map`   s       ++- 	[E-7-N-Nuzz-Zuzz*	[ "--/ 	]F.8.O.OPVP[P[.\v{{+	]!!r   N)__name__
__module____qualname__staticmethodr   strr   r   r-   r4   r%   dtyper9   r/   r=   dictr@    r   r   r	   r	      s    9$4 9C 9C 9 9 :3 :3 : : 4 4 4 4 4 4 8U[[ 8 8 8U[[ 8 8 "+; "S%++EU@V " "r   r	   c                       e Zd Zedefd       Ze	 d
dej                  dej                  dej                  deej                     fd       Z	edd	       Z
y)IOBindingHelperr
   c                     i }|j                         D ]^  \  }}t        j                  | |      }t        j                  |      }t	        j
                  t        j                  |      ||      ||<   ` |S )zpReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.)rF   device)itemsr	   r   r4   r/   emptyr%   prod)r
   output_shapesrL   output_buffersr   shaper   r:   s           r   get_output_buffersz"IOBindingHelper.get_output_buffersm   sr     (..0 	cKD%!11+tDH#::8DJ#(;;uzz%/@
[a#bN4 	c r   N	input_idsposition_idsattention_maskpastc                 8   |t         j                  |       }| j                         }|j                         sJ |j	                  d|j
                  j                  d|d   t        |j                               |j                                |t        |      D ]  \  }	}
|
j                         sJ |
j                         }|dk(  r|j                         }|j	                  d|	 |
j
                  j                  d|d|	    t        |
j                               |        |d|j                         sJ |j	                  d|j
                  j                  d|d   t        |j                               |j                                |d|j                         sJ |j	                  d|j
                  j                  d|d   t        |j                               |j                                | j                         D ]  }|j                  }||   }t        j                  | d|j
                  j                   dt        |j                                       |j                  ||j
                  j                  d||   ||   |j                                 |S )z)Returnas IO binding object for a session.rT   r   past_rV   rU   z device type=z shape=)r	   r@   
io_bindingis_contiguous
bind_inputrL   r   listsizedata_ptrr   r   r   loggerdebugbind_output)r
   rT   rU   rV   rW   rQ   rP   name_to_np_typerZ   ipast_ir_   r   output_nameoutput_buffers                  r   prepare_io_bindingz"IOBindingHelper.prepare_io_bindingw   s    "(>>{KO !++-
 &&(((!!K(!" 	
 &t_ 	6++---!??,q=  )113H%%A3KMM&&#eA3K0'$ %!//111!! %%** 01^((*+'') #--///!!##((/\&&()%%' "--/ 	F ++K*;7MLLK=m6J6J6O6O5PPWX\]j]o]o]qXrWstu""$$)),k*&&(		 r   c                 h   g }| j                         D ]  }|j                  }||   }||   }|dt        j                  |       j	                  |      j                         j                         }	|r.|j                  |	j                         j                                |j                  |	        |S )z3Copy results to cpu. Returns a list of numpy array.r   )	r   r   r%   rO   reshapeclonedetachappendcpu)
r
   rQ   rP   return_numpyort_outputsr   rf   bufferrR   copy_tensors
             r   "get_outputs_from_io_binding_bufferz2IOBindingHelper.get_outputs_from_io_binding_buffer   s     !--/ 	0F ++K#K0F!+.E UZZ%67??FLLNUUWK"";??#4#:#:#<="";/	0 r   N)T)rA   rB   rC   rD   r   rS   r/   Tensorr]   rh   rs   rH   r   r   rJ   rJ   l   s    (8    S<<S llS 	S
 5<< S Sj  r   rJ   c                       e Zd ZdZddedej                  fdZdedefdZ	d Z
d	ed
ej                  fdZdefdZddeeej                  f   dedefdZeddedededeeef   fd       Zy)CudaSessionzLInference Session with IO Binding for ONNX Runtime CUDA or TensorRT providerr
   rL   c                    || _         | j                   j                         D cg c]  }|j                   c}| _        | j                   j	                         D cg c]  }|j                   c}| _        t        j                  | j                         | _        | j                   j                         | _	        || _
        t               | _        t               | _        || _        i | _        y c c}w c c}w rt   )r
   r   r   input_namesr   output_namesr	   r@   io_name_to_numpy_typerZ   enable_cuda_graphr   input_tensorsoutput_tensorsrL   buffer_sharing)selfr
   rL   r|   r   r   s         r   __init__zCudaSession.__init__   s    &484D4D4O4O4QR5EJJR7;7G7G7S7S7UVVV[[V%/%E%EdFVFV%W"**557!2(])m /1 SVs   C-C2
input_namerf   c                     || j                   v sJ || j                  v sJ || j                  |<   || j                  |<   y rt   )ry   rz   r   )r   r   rf   s      r   set_buffer_sharingzCudaSession.set_buffer_sharing   sJ    T-----d/////*5J'+5K(r   c                     | ` | `| `y rt   )r}   r~   rZ   )r   s    r   __del__zCudaSession.__del__   s    Or   r   tensorc           	      l   |j                   j                  |j                   j                  nd}t        |j                        dk(  rdgnt	        |j                        }| j
                  j                  ||j                   j                  || j                  |   ||j                                || j                  v ry| j
                  j                  | j                  |   |j                   j                  || j                  |   ||j                                || j                  | j                  |   <   y y )Nr      )rL   indexlenrR   r]   rZ   r\   r   r{   r_   r   rb   r~   )r   r   r   	device_idtensor_shapes        r   bind_input_and_buffer_sharingz)CudaSession.bind_input_and_buffer_sharing   s   +1==+>+>+JFMM''PQ	!&,,/14s$v||:L""MM&&t,OO	
 4&&&OO''##D)""**40! >DD 3 3D 9: 'r   
shape_dictc                    | j                   r|j                         D ]  \  }}|| j                  v s|| j                  v r:t	        | j                  |   j
                        t	        |      k(  rRt        d      | j                  |   }t        j                  t	        |      t        j                  |            j                  | j                        }|| j                  |<   | j                  ||        |j                         D ]Q  \  }}|| j                  v s|| j                   v r/t	        | j                   |   j
                        t	        |      k(  rS|| j"                  v rb| j                  |   }t        j                  t	        |      t        j                  |            j                  | j                        }|| j                   |<   | j$                  j'                  ||j                  j(                  |j                  j*                  |j                  j*                  nd|t-        |j/                               |j1                                T y)z Allocate tensors for I/O Bindingz(Expect static input shape for cuda graph)rF   )rL   Nr   )r|   rM   ry   r}   tuplerR   RuntimeErrorr{   r/   rN   r	   r9   torL   r   rz   r~   r   rZ   rb   r   r   r]   r^   r_   )r   r   r   rR   numpy_dtyper   s         r   allocate_bufferszCudaSession.allocate_buffers  s   !!)//1 Ee4+++t111 !3!3D!9!?!?@E%LP$*+UVV"&"<"<T"BK"[[uZ=`=`al=mnqq#{{ r F 06D&&t,66tVDE &++- 	KD%t(((4...59L9LT9R9X9X3Y]bch]i3i4..."88>U5\9\9\]h9ijmm;; n  -3##D)++MM&&+1==+>+>+JFMM''PQ'OO%	r   N	feed_dictrun_optionssynchronizec                 <   |j                         D ]  \  }}t        |t        j                        r|j	                         sJ || j
                  v sA| j                  r| j                  |   j                         |j                         k(  sJ | j                  |   j                  |j                  k(  sJ |j                  j                  dk(  sJ | j                  |   j                  |       | j                  ||        |rf| j                  j                          | j                   j#                  | j                  |       | j                  j%                          | j&                  S | j                   j#                  | j                  |       | j&                  S )z$Bind input tensors and run inferencecuda)rM   
isinstancer/   ru   r[   ry   r|   r}   nelementrF   rL   r   copy_r   rZ   synchronize_inputsr
   run_with_iobindingsynchronize_outputsr~   )r   r   r   r   r   r   s         r   inferzCudaSession.infer<  sV   %OO- 		ELD&fell38L8L8NNNt'''))--d3<<>&//BSSSS--d399V\\III!==--777&&t,226:66tVD		E OO..0//MOO//1 """ //M"""r   r   r|   streamr   c                 8    | d|d}|dk7  rt        |      |d<   |S )NkSameAsRequested)r   arena_extend_strategyr|   r   user_compute_stream)rE   )r   r|   r   optionss       r   get_cuda_provider_optionsz%CudaSession.get_cuda_provider_optionsR  s1     #%7!2
 Q;-0[G)*r   F)NT)r   )rA   rB   rC   __doc__r   r/   rL   r   rE   r   r   ru   r   	ShapeDictr   rG   r   r*   r   rD   intr   r   rH   r   r   rw   rw      s    V1$4 1ell 16S 6s 6
D# Du|| D0(9 (T#tC$56 #Z #ei #, S T SV _cdgildl_m  r   rw   c                        e Zd Z	 	 	 	 ddedej
                  dedededede	e
e
f   dz  f fd	Zdd
edefdZdde	e
ej                  f   d
ef fdZ xZS )
GpuBindingNr
   rL   r   enable_gpu_graphgpu_graph_idr   r   c                    t         
|   |||       |r*|j                         D ]  \  }}	| j                  ||	        | j	                  |       || _        |rt        j                  |      nd | _        || _	        d | _
        y rt   )superr   rM   r   r   r   copydeepcopyr   r   last_run_gpu_graph_id)r   r
   rL   r   r   r   r   r   r   rf   	__class__s             r   r   zGpuBinding.__init__b  s     	f.>?+9+?+?+A A'
K''
K@A 	j)(7G$--
3T%)"r   disable_cuda_graph_in_runr   c                 ~    t               }|rdn| j                  }|j                  dt        |             || _        |S )Nr   )r   r   add_run_config_entryrE   r   )r   r   r   r   s       r   get_run_optionszGpuBinding.get_run_optionsy  s:    ,6rD<M<M$$^S5FG%1"r   r   c                     | j                  |      }| j                  r|j                  dd       t        |   ||      S )N'disable_synchronize_execution_providers1)r   r   r   r   r   )r   r   r   r   r   s       r   r   zGpuBinding.infer  s>    **+DE;;,,-VX[\w}Y44r   )Fr   r   Nr   )rA   rB   rC   r   r/   rL   r   r*   r   rG   rE   r   r   r   ru   r   __classcell__)r   s   @r   r   r   a  s     "'04*%* * 	*
 * * * S#X-*.	 	* 	5tC$56 5SW 5 5r   r   c            	       l    e Zd ZdZddedej                  dedefdZ	 	 dde	d	e
d
eeef   dz  defdZy)GpuBindingManagerzA manager for I/O bindings that support multiple CUDA Graphs.
    One cuda graph is reused for same input shape. Automatically add a new cuda graph for new input shape.
    r
   rL   r   max_cuda_graphsc                 X    || _         || _        g | _        d | _        || _        || _        y rt   )r
   rL   graph_bindingsno_graph_bindingr   r   )r   r
   rL   r   r   s        r   r   zGpuBindingManager.__init__  s4    & ! !%.r   Nr   use_cuda_graphr   r   c           	      6   | j                   D ]  }|j                  |k(  s|c S  t        | j                         | j                  k\  s|sr| j                  ?t        | j                  | j                  || j                  |      | _        | j                  S | j                  j                  |       | j                  S t        | j                  | j                  |dt        | j                         | j                  |      }| j                   j                  |       |S )N)r   r   T)r   r   r   r   )r   r   r   r   r   r   r
   rL   r   r   rm   )r   r   r   r   gpu_graph_bindings        r   get_bindingzGpuBindingManager.get_binding  s    "&!4!4 	) ++z9((	) t""#t';';;N$$,(2$$dkk:dkkbp)%
 ((( %%66zB((( 'KK!T001;;)
 	""#45  r   )r   r   )FN)rA   rB   rC   r   r   r/   rL   r   r   r   r*   rG   rE   r   r   rH   r   r   r   r     so    /$4 /ell /TW /nq /"  %04	 ! !  ! S#X-	 !
 
 !r   r   )r   loggingcollectionsr   collections.abcr   typingr   r%   r/   onnxruntimer   r   rE   r   r]   r   r   	getLoggerrA   r`   r	   rJ   rw   r   r   rH   r   r   <module>r      s      # #    4 Cc**+				8	$W" W"tn nbA AH)5 )5X3! 3!r   