
    pi$X                     "   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlZd dlZd dlZd dlZd dlmZ d dlZ ej2                  e      Z G d	 d
e      Z G d de      Z G d d      Zdej>                  iZ ddddddi fdZ!d(dZ"d)dZ#d Z$d Z%d Z&d Z'd*dZ(ejR                  d fdZ*d Z+d+dZ,de-e.e/ef      dz  fdZ0 G d  d!e      Z1 G d" d#e1      Z2 G d$ d%e1      Z3d,d&Z4d' Z5y)-    N)ABCabstractmethod)ThreadPoolExecutor)datetime)Enum)sleep)Any)versionc                   "    e Zd ZdZdZdZdZd Zy)	Precisionfp32fp16int8int4c                     | j                   S Nvalueselfs    k/opt/services/ai/voice_agent/venv/lib/python3.12/site-packages/onnxruntime/transformers/benchmark_helper.py__str__zPrecision.__str__&       zz    N)__name__
__module____qualname__FLOAT32FLOAT16INT8INT4r    r   r   r   r       s    GGDDr   r   c                       e Zd ZdZdZdZd Zy)OptimizerInfono_optby_ort	by_scriptc                     | j                   S r   r   r   s    r   r   zOptimizerInfo.__str__1   r   r   N)r   r   r   NOOPTBYORTBYSCRIPTr   r"   r   r   r$   r$   *   s     EEHr   r$   c                       e Zd Zd Zd Zd Zy)ConfigModifierc                     || _         y r   
num_layers)r   r0   s     r   __init__zConfigModifier.__init__6   s	    $r   c                    | j                   y t        |d      r3| j                   |_        t        j	                  d| j                           t        |d      r3| j                   |_        t        j	                  d| j                           t        |d      r4| j                   |_        t        j	                  d| j                           y y )Nnum_hidden_layersz6Modifying pytorch model's number of hidden layers to: encoder_layersz7Modifying pytorch model's number of encoder layers to: zdecoder_layers z7Modifying pytorch model's number of decoder layers to: )r0   hasattrr3   loggerinfor4   decoder_layers)r   configs     r   modifyzConfigModifier.modify9   s    ??"6./'+F$KKPQUQ`Q`Pabc6+,$(OOF!KKQRVRaRaQbcd6,-$(OOF!KKQRVRaRaQbcd .r   c                     | j                   S r   r/   r   s    r   get_layer_numzConfigModifier.get_layer_numF   s    r   N)r   r   r   r1   r:   r<   r"   r   r   r-   r-   5   s    %er   r-   float32TFc	                    t        j                         }	|r t         j                  j                  |	_        nt         j                  j
                  |	_        |rd|	_        |dkD  r)||	_        t        j                  d|	j                          |rd|	_
        nd|	_
        |t        j                         v r|g}
nG|rB|dk(  rddg}
n;|dk(  rd	dg}
n1|d
k(  rg d}
n'|dk(  s|ddg}
n|dk(  rg d}
nt        d|       dg}
|r|
D cg c]  }||v r|||   fn| }
}|r|	j                  dd       d }	 t        j                  | |	|
      }|S c c}w # t        $ r t        j!                  d|  d|
        Y |S w xY w)NTr   z%Session option: intra_op_num_threads=   dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProvidermigraphx)MIGraphXExecutionProviderrE   rC   cudaCUDAExecutionProvidertensorrt)TensorrtExecutionProviderrI   rC   z)The execution provider is not supported: z(mlas.enable_gemm_fastmath_arm64_bfloat161)	providerszFailed to create session for z with providers=)onnxruntimeSessionOptionsGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_ENABLE_BASICenable_profilingintra_op_num_threadsr6   debuglog_severity_levelget_available_providersRuntimeErroradd_session_config_entryInferenceSession	Exception	exception)onnx_model_pathuse_gpuproviderenable_all_optimizationnum_threadsrT   verbose(enable_mlas_gemm_fastmath_arm64_bfloat16provider_optionssess_optionsrM   namesessions                r   create_onnxruntime_sessionri   P   s    --/L0;0R0R0a0a-0;0R0R0c0c-(,%Q,7)<\=^=^<_`a*+'*+';6688J		u/1GHI02HII#I
 8#302HII#I !J8*UVV+,	fop^bt?O7Od,T23UYYp	p/--.XZ]^Gg..Xab N q  g88IIYZcYdefNgs   E2E $E98E9c                     | rt        j                  dd       y t        j                  d       t        j                  d      j	                  t        j
                         y )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(message)s)rm   transformers)coloredlogsinstalllogging	getLoggersetLevelWARNING)rc   s    r   setup_loggerru      sF    J	

 	..)227??Cr   c                    | r4t         j                  j                  |       st        j                  |        |r4t         j                  j                  |      st        j                  |       |rW|dk(  rdt	        j
                         v s<J d       t        t	        j
                               j                  g d      rJ d       t        j                  dt        j                          t        j                  dt        j                          t        j                  dt        j                          t        j                  t        j                        t        j                  d	      k\  sJ t        j                  t        j                        t        j                  d
      k\  sJ t        j                  t        j                        t        j                  d	      k\  sJ y )NrA   rB   zBPlease install onnxruntime-directml package to test GPU inference.)rI   rE   rG   zWPlease install onnxruntime-gpu package, or install ROCm support, to test GPU inference.zPyTorch Version:zTransformers Version:zOnnxRuntime Version:z1.10.0z4.12.0)ospathexistsmakedirsrN   rX   set
isdisjointr6   r7   torch__version__rn   r
   parse)	cache_dir
output_dirr_   r`   s       r   prepare_environmentr      sf   	2
I"''..4
Ju)[-P-P-RR TR
 ;>>@ALL_ ihi  KK"5#4#4"567
KK'(@(@'ABC
KK&{'>'>&?@A ==**+w}}X/FFFF==112gmmH6MMMM==001W]]85LLLLr   c                 p   t        |       t        t        |             z  dz  }t        j                  | t        j
                        dz  }|d|z  z  }t        |       |dt        j                  | d      dz  dt        j                  | d      dz  dt        j                  | d      dz  d|d|ddS )Ng     @@)dtypez.2fZ   _   c   )
test_timeslatency_variancelatency_90_percentilelatency_95_percentilelatency_99_percentileaverage_latency_msQPS)sumfloatlennumpyvarfloat64
percentile)latency_list
batch_size
latency_msr   
throughputs        r   get_latency_resultr      s    \"U3|+<%==FJyyU]]CfLv
23J ,'/4$)$4$4\2$F$OPS#T$)$4$4\2$F$OPS#T$)$4$4\2$F$OPS#T!+C 0S! r   c                    t        |ddd      5 }g d}t        j                  ||      }|j                          | D ]  }|j	                  |        	 d d d        t
        j                  d|        y # 1 sw Y   "xY w)Na asciimodenewlineencoding)enginer
   rM   device	precision	optimizer
io_binding
model_nameinputsthreadsr   sequence_lengthcustom_layer_numr   r   r   r   r   r   r   r   
fieldnamesz&Detail results are saved to csv file: )opencsv
DictWriterwriteheaderwriterowr6   r7   )resultscsv_filenamecsv_filecolumn_names
csv_writerresults         r   output_detailsr      s    	lb7	C (x
0 ^^HF
  	(F'	(7(< KK8GH=( (s   AA66A?c                    t        |ddd      5 }g d}g }|j                  D ]O  }|j                  dgk(  r|j                  d|        (|j                  D ]  }|j                  d| d|         Q t	        j
                  |||z         }|j                          |j                  D ]  }	d	D ]  }
|j                  D ]   }d
D ]  }|j                  D ]  }i }| D ]  }|d   |	k(  s|d   |
k(  s|d   |k(  s|d   |k(  s'|d   |k(  s0|j                         D ci c]  \  }}||v s|| }}}|s7|j                  |       |j                  t        j                  |d             n|D ]  }||   ||   k(  rJ  |d   }|d   }|r|d   |d| d| <   |d   |d| <    |s|j                  |            	 d d d        t        j!                  d|        y c c}}w # 1 sw Y   (xY w)Nr   r   r   r   )r   r   r   r   r
   rM   r   r   r   r   r   b_sr   )         )TFr   r   r   r   r   r   r   r   r   z'Summary results are saved to csv file: )r   batch_sizessequence_lengthsappendr   r   r   modelsenginesrb   itemsupdatedictfromkeysr   r6   r7   )r   r   argsr   header_names
data_namesr   r   r   r   input_countengine_namer   r   rowr   kvheadersr   ss                        r   output_summaryr      sh   	lb7	C 49x
 
** 	KJ$$,!!Aj\"23'+'<'< KO%%*R7H&IJK		K ^^H
9RS
 ++ 	9J( 9#'<< 9K&7 9
'+'7'7 9G"$C*1 T$*<$8J$F(.x(8K(G(.x(8K(G(.|(<
(J(.y(9W(D@F.d1RSWcRcq!t.dG.d+.(+

7(;(+

4==R3P(Q1= )HA36q6WQZ3G,G3G)H(.|(<A(./@(AA'(<BCW<Xas"QCL(97=>R7SasG)T*  # * 3 3C 819999	9149l KK9,HI! /eM49 49sO   CG3)G32G3;G3G3G3 G--G-2AG3?-G3-G3-G33G<c           
      .   t        |ddd      5 }ddddgt        t        t        | j	                                     j                               }t        j                  ||	      }|j                          | D ]m  }t        t        j                               | |   d<   t        j                  | |   d<   t        j                  | |   d<   || |   d<   |j                  | |          o 	 d d d        t         j#                  d
|        y # 1 sw Y   "xY w)Nr   r   r   r   model_filenamer   rn   r}   r   z(Fusion statistics is saved to csv file: )r   listnextitervalueskeysr   r   r   strr   nowrn   r~   r}   r   r6   r7   )model_fusion_statisticsr   r   r   r   keys         r   output_fusion_statisticsr   *  s   	lb7	C >x	

 $t3::<=>CCEF
 ^^HF
 * 	>C7:8<<>7J#C(4;G;S;S#C(8494E4E#C(1=@#C()9: 7 <=	>>  KK:<.IJ!> >s   CDDc                      i }t        j                   fdd|       t        j                   fdd|      }|j                  |       |j                  ddi       |j                  t        ||             |S )Nc                  (    j                  d        S r   run
ort_inputsort_sessions   r   <lambda>zinference_ort.<locals>.<lambda>@  s    +//$
; r   r   numberrepeatc                  (    j                  d        S r   r   r   s   r   r   zinference_ort.<locals>.<lambda>A  s    z)J r   r   F)timeitr   r   r   )r   r   result_templaterepeat_timesr   warm_up_repeatr   r   s   ``      r   inference_ortr   >  sd    F
MM;An]==!JST]ijL
MM/"
MM<'(
MM$\:>?Mr   c           
      b    i } j                         |D ]  }t        j                  ||         j                  |	      }t        j                  t        ||   j                        |
      }j                  ||j                  j                  d||j                  |j                                 t        |      dk(  rt        |||	       t        |      D ]^  \  }}j!                  |||   j                  j                  dt"        j$                  ||   j                  ||   j                                ` t'        j(                   fdd|       t'        j(                   fdd|      }|j+                  |       |j+                  ddi       |j+                  t-        ||             |S )Nr   c                  &    j                         S r   run_with_iobindingr   r   s   r   r   z/inference_ort_with_io_binding.<locals>.<lambda>u      ..z: r   r   r   c                  &    j                         S r   r   r   s   r   r   z/inference_ort_with_io_binding.<locals>.<lambda>{  r   r   r   T)r   r}   
from_numpytoIO_BINDING_DATA_TYPE_MAPgetr   r   
bind_inputr   typeshapedata_ptrr   allocateOutputBuffers	enumeratebind_outputr   r=   r   r   r   r   )r   r   r   r   ort_output_namesort_outputsoutput_buffersoutput_buffer_max_sizesr   r   	data_typer   r   rg   np_input
input_typeiort_output_namer   r   s   `                  @r   inference_ort_with_io_bindingr  H  s    F '')J 

##Jt$4588@-11#j6F6L6L2MyY
OO  NN	


 >an.EvN'(89 
?1$$))MMN  1&&(	

 MM: ==:L
 MM/"
MM<&'
MM$\:>?Mr   c                 |    |D ]7  }| j                  t        j                  |t        j                  |             9 y )N)r   r   )r   r}   emptyr=   )r  r  r   r  s       r   r  r    s4     % Rekk!5==PQRr   c                    t        j                  |        t        j                   j                  |        t        j                  |        t        j
                  j	                  |        t        j
                  j                  |        y)z5Set random seed manually to get deterministic resultsN)randomseedr   r}   manual_seedrH   manual_seed_all)r  s    r   set_random_seedr    sR    
KK	LLd	d	JJ4 	JJt$r   returnc            	         ddl m} m}m}m}m}m}m} 	  |        g } |       }t        |t              sy t        |      D ]c  }	 | ||	            }
t        |
t              r y |j                  |	 | ||	            |
j                  |
j                  |
j                  d       e  |        |S # | $ r}t!        d|       Y d }~y d }~ww xY w)Nr   	NVMLErrornvmlDeviceGetCountnvmlDeviceGetHandleByIndexnvmlDeviceGetMemoryInfonvmlDeviceGetNamenvmlInitnvmlShutdown)idrg   totalfreeused-Error fetching GPU information using nvml: %s)py3nvml.py3nvmlr  r  r  r  r  r  r  
isinstanceintranger   r   r!  r"  r#  print)r  r  r  r  r  r  r  r   device_countr  r7   errors               r   get_gpu_infor,    s      
)+,,|$ 	A*+Ea+HID$$MM-.H.KL!ZZ II II		 	 =uEs#    B/ -B/ #AB/ /C
4CC
c                   F    e Zd ZddZd Zedeeee	f      dz  fd       Z
y)MemoryMonitorc                     || _         y r   )keep_measuring)r   r0  s     r   r1   zMemoryMonitor.__init__  s
    ,r   c                     dd l }d}	 t        ||j                  t        j                               j                         j                  dz        }t        d       | j                  s	 |S c)Nr      {Gzt?)	psutilmaxProcessrw   getpidmemory_inforssr   r0  )r   r4  	max_usages      r   measure_cpu_usagezMemoryMonitor.measure_cpu_usage  s[    	Iv~~biik'B'N'N'P'T'TW^'^_I%L&& r   r  Nc                     t               r   )NotImplementedErrorr   s    r   measure_gpu_usagezMemoryMonitor.measure_gpu_usage  s    !##r   T)r   r   r   r1   r;  r   r   r   r   r	   r>  r"   r   r   r.  r.    s9    -	 $4S#X#7$#> $ $r   r.  c                   B     e Zd Zd fd	Zdeeeef      dz  fdZ xZ	S )CudaMemoryMonitorc                 $    t         |   |       y r   )superr1   )r   r0  	__class__s     r   r1   zCudaMemoryMonitor.__init__  s    (r   r  Nc                    ddl m}m}m}m}m}m}m} g }g }		  |         |       }
t        |
t              st        j                  d|
        y t        |
      D cg c]  }d }}t        |
      D cg c]  } | ||             }	}	 t        |
      D ]Y  } | ||            }t        |t              rt        j                  d|         y t        ||   |j                  dz        ||<   [ t!        d       | j"                  sn |        t        |
      D cg c]  }||	|   ||   d c}S c c}w c c}w c c}w # |$ r }t        j                  d|       Y d }~y d }~ww xY w)	Nr   r  z*nvmlDeviceGetCount result is not integer: z%nvmlDeviceGetMemoryInfo returns str: r2  r3  	device_idrg   max_used_MBr$  )r%  r  r  r  r  r  r  r  r&  r'  r6   r+  r(  r   r5  r#  r   r0  )r   r  r  r  r  r  r  r  max_gpu_usagegpu_namer*  r  r7   r+  s                 r   r>  z#CudaMemoryMonitor.measure_gpu_usage  s   	
 	
 	
 	J-/LlC0I,XY(-l(;<1Q<M<RWXdRefQ)*DQ*GHfHf|, RA23Ma3PQD!$,'LTF%ST#'*=+;TYY=P'QM!$R e**  N |, 	 "#$QK#0#3  =f  	LLH%P	sO   6E E 	D6%E 4D;	AE AE  E 3E 6E E*
E%%E*r?  )
r   r   r   r1   r   r   r   r	   r>  __classcell__rD  s   @r   rA  rA    s&    )+4S#X#7$#> +r   rA  c                   ,     e Zd Zd fd	Zd Zd Z xZS )RocmMemoryMonitorc                 @   t         |   |       d}t        j                  j	                  |      r1|t
        j                  vrt
        j                  j                  |       	 dd l}|| _        | j                  j                          y # t        $ r
 d | _        Y y w xY w)Nz/opt/rocm/libexec/rocm_smir   )
rC  r1   rw   rx   ry   sysr   rocm_smiinitializeRsmiImportError)r   r0  rocm_smi_pathrQ  rD  s       r   r1   zRocmMemoryMonitor.__init__  sv    (477>>-(CHH,.	!$DMMM((* 	! DM	!s   $%B
 
BBc                 f    | j                   y| j                   j                  |d      d   dz  dz  S )Nr>   VRAMr   i   )rQ  
getMemInfo)r   devs     r   get_used_memoryz!RocmMemoryMonitor.get_used_memory  s5    == }}''V4Q7$>EEr   c                    | j                   y | j                   #t        | j                   j                               nd}t        |      D cg c]  }d }}t        |      D cg c]  }d| 	 }}	 t        |      D ]#  }t	        ||   | j                  |            ||<   % t        j                  d       | j                  snTt        |      D cg c]  }|||   ||   d c}S c c}w c c}w c c}w )Nr   GPUr3  rF  )	rQ  r   listDevicesr(  r5  rY  timer   r0  )r   r*  r  rI  rJ  s        r   r>  z#RocmMemoryMonitor.measure_gpu_usage  s   == ;?==;Ts4==4467Z[$),$78q88',\':;!c!I;;<( R#&}Q'79M9Ma9P#Qa RJJu&&  <(
 	  ,Q/
 	
 9;
s   	C*$C/C4r?  )r   r   r   r1   rY  r>  rK  rL  s   @r   rN  rN    s    !F

r   rN  c                 |   d }|dk(  rt         }nt        } |d      }| r#||}n|j                         }|y ||S t               5 } |       }|j	                  |j                        }	 |j	                  |      }	|	j                         }
d|_        |j                         }|
	 d d d        y t        j                  d| d|        t        |      dk\  rct        |      dk\  rUt        |      t        |      k(  r>d}t        |      D ]#  \  }}|d   }||   d   }||z
  }t        ||      }% |cd d d        S d d d        y ||}n|j                         }||S t               5 } |       }|j	                  |j                        }	 |j	                  |      }	|	j                         }
d|_        |j                         }t        j                  d|d	d
|d	d       ||z
  cd d d        S # d|_        |j                         }w xY w# 1 sw Y   y xY w# d|_        |j                         }w xY w# 1 sw Y   y xY w)NrD   FzGPU memory usage: before=z  peak=r   r   rH  zCPU memory usage: before=z.1fz
 MB, peak=z MB)rN  rA  r>  r   submitr   r0  r6   r7   r   r  r5  r;  )is_gpufuncmonitor_typestart_memorymemory_monitor_typemonitormemory_before_testexecutor
mem_thread	fn_thread_r:  max_usedr  memory_beforebeforeafterr#  s                     r   measure_memoryro  0  s   v//!%(G#!-!(!:!:!<%<%%! 	 X)+G!)B)BCJ0$OOD1	$$&).&&--/	 	  	  KK34F3GwykZ[%&!+I!0CL^H_cfgpcqHq(12D(E 3$A}*=9F%aL7E 6>D"8T2H	3
  -	  	  	 .  )$668|!!		 .%'__W%>%>?
	, -I  "A%*G""))+I/0B3/GzR[\_Q``cde--. .7 */&&--/		 . $ &+G""))+I. .sO   #H
*!G.H
.BH
&#H2
!H+9H2.HH

HH//H22H;c                  r    g d} d}| D ]+  }t        j                  |      }||r|dz  }|| d| z  }- |S )N)ORT_DISABLE_FUSED_ATTENTION!ORT_ENABLE_FUSED_CAUSAL_ATTENTION!ORT_DISABLE_FUSED_CROSS_ATTENTIONORT_DISABLE_TRT_FLASH_ATTENTION&ORT_DISABLE_MEMORY_EFFICIENT_ATTENTIONORT_TRANSFORMER_OPTIONSORT_CUDA_GEMM_OPTIONSr   ,=)rw   getenv)	env_namesenvrg   r   s       r   get_ort_environment_variablesr}  t  s\    I C !		$=3JC$q  ! Jr   r?  r   )r   ){   )rH   N)6r   rq   rw   r  rP  r]  r   abcr   r   concurrent.futuresr   r   enumr   r   typingr	   ro   r   r}   rn   	packagingr
   rN   rr   r   r6   r   r$   r-   r=   r   ri   ru   r   r   r   r   r   r   longlongr  r  r  r   r   r   r,  r.  rA  rN  ro  r}  r"   r   r   <module>r     s=     	  
   # 1          			8	$ D  , u}}   -2DNDM8 ID7JtK(* nn:zR%#d4S>*T1 #L$C $(/ /d(
 (
VA.Hr   