
    pi3                         d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	Z	ddl
Z	ddlmZmZ  ej                  d      Z G d d	      Zy)
zClass for ONNX model.    N)deque)Path   )MAXIMUM_PROTOBUFfind_by_nameneural_compressorc                      e Zd ZdZd Zd Zed        Zed        Zej                  d        Zd Z
ed        Zej                  d	        Zed
        Zed        Zej                  d        Zd Zd Zd Zed        Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d  Z!d! Z"d" Z#dLd#Z$ed$        Z%d% Z&ed&        Z'd' Z(d( Z)dMd*Z*dMd+Z+dMd,Z,d- Z-d. Z.d/ Z/dLd0Z0e1d1        Z2dNd2Z3e1d3        Z4dNd4Z5d5 Z6dLd6Z7dMd7Z8d8 Z9dLd9Z:d: Z;d; Z<d< Z=d= Z>dMd>Z?	 	 	 	 dOd?Z@	 	 dNd@ZAdA ZBdB ZC	 dPdCZDdD ZEdE ZFdF ZGdG ZHdMdHZIdQdIZJdJ ZKdK ZLy))R	ONNXModelzBuild ONNX model.c                    t        |t              s|nt        j                  |d      | _        t        |t              sdn|| _        | j                          | j                  r3| j
                  '|j                  dd      st        j                  d       | j                  rst        |t              rc|j                  dd      rQt        j                  j                  | j                  t        j                  j                  | j
                               d| _        t        |t              rt        j                  j#                  t%        |      j&                  j)                  d      j+                               r=d	d
lm} |j1                  t%        |      j&                  j+                               | _        i | _        i | _        i | _        | j9                  | j                  j:                  j<                         | j?                  | j                  j:                  j<                         i | _         | jC                          d| _"        y)a;  Initialize an ONNX model.

        Args:
            model (str or ModelProto): path to onnx model or loaded ModelProto model object.
            ignore_warning (bool): ignore large model warning. Default is False.
            load_external_data (bool): load external data for large model. Default is True.
        F)load_external_dataNignore_warningzPModel size > 2GB. Please use model path instead of onnx model object to quantizer   Tconfig.jsonr   )
AutoConfig)#
isinstancestronnxload_model_model_pathcheck_is_large_model_is_large_modelgetloggerwarningexternal_data_helperload_external_data_for_modelospathdirname_configexistsr   parentjoinpathas_posixtransformersr   from_pretrainednode_name_counter_output_name_to_node_input_name_to_nodes_get_input_name_to_nodesgraphnode_get_output_name_to_node_graph_info_get_graph_info	_q_config)selfmodelkwargsr   s       w/opt/services/ai/voice_agent/venv/lib/python3.12/site-packages/onnxruntime/quantization/neural_compressor/onnx_model.py__init__zONNXModel.__init__)   s    $.eS#9etyych?i'1%'=45!!#D$4$4$<VZZP`bgEhNNmnJuc$:vzzJ^`d?e%%BB4;;PRPWPWP_P_`d`p`pPqreS!bggnnT%[5G5G5P5PQ^5_5h5h5j&k/%55d5k6H6H6Q6Q6STDL!#$&!$&!%%dkk&7&7&<&<=%%dkk&7&7&<&<=    c                    d}| j                   j                  j                  D ]~  }|j                  d      r0|j                  t
        j                  j                  k(  r	d| _         y	 |j                         }|t        j                  |      z  }|t        kD  swd| _         y d| _        y# t        $ r!}dt        |      v rd| _        Y d}~ y|d}~ww xY w)zCheck model > 2GB.r   data_locationTNz$exceeds maximum protobuf size of 2GBF)r   r+   initializerHasFieldr8   r   TensorProtoEXTERNALr   SerializeToStringsys	getsizeof	Exceptionr   r   )r1   	init_sizeinit
init_byteses        r4   r   zONNXModel.check_is_large_modelJ   s    	KK%%11 	D}}_-$2D2DHXHXHaHa2a'+$!335
S]]:66	 ++'+$#	$  %  9SVC+/D(Gs   ((B,,	C5CCCc                     | j                   S )z!Check the onnx model is over 2GB.)r   r1   s    r4   is_large_modelzONNXModel.is_large_modela   s     ###r6   c                     | j                   S )zReturn model path.r   rF   s    r4   
model_pathzONNXModel.model_pathf        r6   c                     || _         y)zSet model path.NrI   )r1   r   s     r4   rJ   zONNXModel.model_pathk   s      r6   c                      y)zReturn framework.onnxruntime rF   s    r4   	frameworkzONNXModel.frameworkp   s    r6   c                     | j                   S )zReturn q_config.r0   rF   s    r4   q_configzONNXModel.q_configt   s     ~~r6   c                     || _         y)zSet q_config.NrR   )r1   rS   s     r4   rS   zONNXModel.q_configy   s     "r6   c                     | j                   S )z8Return huggingface config if model is Transformer-based.)r    rF   s    r4   	hf_configzONNXModel.hf_config~   s     ||r6   c                     | j                   S )zReturn model itself.)r   rF   s    r4   r2   zONNXModel.model   s     {{r6   c                    || _         i | _        | j                          i | _        i | _        | j                  | j                   j                  j                         | j                  | j                   j                  j                         y)zSet model itself.N)	r   r.   r/   r(   r)   r*   r+   r,   r-   )r1   r2   s     r4   r2   zONNXModel.model   sk     $&!$&!%%dkk&7&7&<&<=%%dkk&7&7&<&<=r6   c                 |    | j                   j                  j                  D cg c]  }|j                   c}S c c}w )zReturn input of model.)r   r+   inputnamer1   is     r4   rZ   zONNXModel.input   s*     $ 1 1 7 781888   9c                 |    | j                   j                  j                  D cg c]  }|j                   c}S c c}w )zReturn output of model.)r   r+   outputr[   r\   s     r4   r`   zONNXModel.output   s*     $ 1 1 8 891999r^   c                 
   i | _         | j                          i | _        i | _        | j	                  | j
                  j                  j                         | j                  | j
                  j                  j                         y)zUpdate model info.N)	r.   r/   r(   r)   r*   r   r+   r,   r-   rF   s    r4   updatezONNXModel.update   sb    $&!$&!%%dkk&7&7&<&<=%%dkk&7&7&<&<=r6   c                     | j                   S )zEReturn ORT Graph Info object holding information about backend graph.)r.   rF   s    r4   
graph_infozONNXModel.graph_info   rK   r6   c                     | j                   j                  j                  D ]3  }| j                  j	                  |j
                  |j                  i       5 y)zUpdate graph info.N)r   r+   r,   rd   rb   r[   op_typer1   r,   s     r4   r/   zONNXModel._get_graph_info   s@    KK%%** 	>DOO""DIIt||#<=	>r6   c           	         t         j                  j                  |      d   dk7  rJt         j                  j                  t         j                  j                  |      d         st	        d      | j
                  rt        j                  j                  | j                  t         j                  j                  | j                        d          t        j                  | j                  |dd|j                  d      d   dz   dd	
       n t        j                  | j                  |       | j                  t        | j                  d      sdn| j                  j                  }|| j                  j                   _        t#        |      j$                  j'                  d      j)                         }| j                  j+                  |d	       yy)zSave ONNX model.r    z!"root" directory does not exists.T/_data   Fsave_as_external_dataall_tensors_to_one_filelocationsize_thresholdconvert_attributeN
model_typer   )use_diff)r   r   splitr!   
ValueErrorrG   r   r   r   r   r   
save_modelsaver    hasattrrt   	__class__r   r"   r#   r$   to_json_file)r1   rootrt   output_config_files       r4   ry   zONNXModel.save   sR   77==q!R'rww}}T?RST?U0V@AA%%BB4;;PRPWPWP]P]^b^n^nPopqPrsOO&*(,C,w6#"' IIdkk4(<<##*4<<#FDLLLcLcJ0:DLL""-!%d!2!2!;!;M!J!S!S!ULL%%&85%I	 $r6   c                 B    | j                   j                  j                  S )zReturn model nodes.)r   r+   r,   rF   s    r4   nodeszONNXModel.nodes   s    {{  %%%r6   c                 B    | j                   j                  j                  S )zReturn model initializer.)r   r+   r9   rF   s    r4   r9   zONNXModel.initializer   s    {{  ,,,r6   c                 .    | j                   j                  S )zReturn model graph.)r   r+   rF   s    r4   r+   zONNXModel.graph   s    {{   r6   c                 .    | j                   j                  S )zReturn model ir_version.)r   
ir_versionrF   s    r4   r   zONNXModel.ir_version   s    {{%%%r6   c                 .    | j                   j                  S )zReturn model opset_import.)r   opset_importrF   s    r4   r   zONNXModel.opset_import   s    {{'''r6   c                     || j                   j                  j                  v r0| j                   j                  j                  j                  |       yy)zRemove a node from model.N)r   r+   r,   removerg   s     r4   remove_nodezONNXModel.remove_node   s=    4;;$$)))KK""))$/ *r6   c                 4    |D ]  }| j                  |        y)zRemove nodes from model.N)r   )r1   nodes_to_remover,   s      r4   remove_nodeszONNXModel.remove_nodes   s    # 	#DT"	#r6   c                 d    | j                   j                  j                  j                  |g       y)zAdd a node to model.Nr   r+   r,   extendrg   s     r4   add_nodezONNXModel.add_node   s"    %%tf-r6   c                 b    | j                   j                  j                  j                  |       y)zAdd nodes to model.Nr   )r1   nodes_to_adds     r4   	add_nodeszONNXModel.add_nodes   s     %%l3r6   c                     t        |j                  | j                  j                  j                        1| j                  j                  j                  j                  |g       yy)zAdd a initializer to model.N)r   r[   r   r+   r9   r   r1   tensors     r4   add_initializerzONNXModel.add_initializer   sI    T[[%6%6%B%BCKKK))00&: Lr6   c                 4    |D ]  }| j                  |        y)zAdd initializers to model.N)r   )r1   tensorsr   s      r4   add_initializerszONNXModel.add_initializers   s     	)F  (	)r6   c                 t    | j                   j                  j                  D ]  }|j                  |k(  s|c S  y)zGet an initializer by name.N)r   r+   r9   r[   )r1   r[   r   s      r4   get_initializerzONNXModel.get_initializer   s7    kk''33 	F{{d"	 r6   c                     d}| j                  |      |S | j                         D ]  }||j                  v s|dz  } |S )z(Get the number of shares of initializer.r   r   )r   r   rZ   )r1   r[   numr,   s       r4   get_initializer_share_numz#ONNXModel.get_initializer_share_num   sN    %-JJJL 	Dtzz!q	 
r6   c                 t    | j                   j                  j                  D ]  }|j                  |k(  s|c S  y)zGet a node by name.N)r   r+   r,   r[   )r1   r[   r,   s      r4   get_nodezONNXModel.get_node  s7    KK%%** 	DyyD 	 r6   c                     || j                   j                  j                  v r0| j                   j                  j                  j                  |       yy)z!Remove an initializer from model.N)r   r+   r9   r   r   s     r4   remove_initializerzONNXModel.remove_initializer  s=    T[[&&222KK))008 3r6   c                 4    |D ]  }| j                  |        y)zRemove initializers from model.N)r   )r1   init_to_remover9   s      r4   remove_initializerszONNXModel.remove_initializers  s    ) 	1K##K0	1r6   c                 ~   | j                  |      }| j                  |       |j                  }|j                  }|s>t        j
                  j                  ||||j                         j                               n1t        j
                  j                  ||||j                         |      }| j                  |       y)zUpdate initializer.)rawN)r   r   dims	data_typer   helpermake_tensorflattentolisttostringr   )r1   r   arrayr   
old_tensorr   r   
new_tensors           r4   set_initializerzONNXModel.set_initializer  s    ))&1

+((	  KK##FItU]]_=S=S=UV((D%..BRX[(\ 	
 	Z(r6   c                     | j                   S )zReturn input names of nodes.)r)   rF   s    r4   input_name_to_nodeszONNXModel.input_name_to_nodes&       (((r6   c                 8   |D ]  }|j                   D cg c]R  }|j                  t        j                  j                  k(  s'|j                  t        j                  j
                  k(  r|T }}t        |      dkD  r,|D ]'  }| j                  |j                  j                         ) |j                  D ]\  }t        |j                               dk7  s || j                  vr|g| j                  |<   ?| j                  |   j                  |       ^  yc c}w )zGet input names of nodes.r   N)	attributetyper   AttributeProtoGRAPHGRAPHSlenr*   gr,   rZ   stripr)   append)r1   r   r,   attrattrs
input_names         r4   r*   z"ONNXModel._get_input_name_to_nodes+  s     	KD !NN99 3 3 9 99TYY$J]J]JdJd=d E 
 5zA~! ?D11$&&++>?"jj K
z'')*a/!)B)BBAE11*=11*=DDTJK	Ks   ADc                     | j                   S )zReturn output names of nodes.)r(   rF   s    r4   output_name_to_nodezONNXModel.output_name_to_node=  r   r6   c                    |D ]  }|j                   D cg c]R  }|j                  t        j                  j                  k(  s'|j                  t        j                  j
                  k(  r|T }}t        |      dkD  r,|D ]'  }| j                  |j                  j                         ) |j                  D ].  }t        |j                               dk7  s || j                  |<   0  yc c}w )zGet output names of nodes.r   N)r   r   r   r   r   r   r   r-   r   r,   r`   r   r(   )r1   r   r,   r   r   output_names         r4   r-   z"ONNXModel._get_output_name_to_nodeB  s     	BD !NN99 3 3 9 99TYY$J]J]JdJd=d E 
 5zA~! ?D11$&&++>?#{{ B{((*+q0=AD--k:B	Bs   AC'c                     g }| j                  |      D ]C  }| j                  |      D ]-  }|j                  |j                  k7  s|j                  |       / E |S )zGet siblings nodes.)get_parentsget_childrenr[   r   )r1   r,   siblingsr"   childs        r4   get_siblingszONNXModel.get_siblingsQ  s^    &&t, 	+F**62 +::*OOE*+	+ r6   Nc                     || j                   }g }|j                  D ]"  }||v s||   D ]  }|j                  |        $ |S )zGet children nodes.)r)   r`   r   )r1   r,   r   childrenr`   r   s         r4   r   zONNXModel.get_childrenZ  s\    &"&";";kk 	+F,,08 +EOOE*+	+ r6   c                 z    || j                   }g }|j                  D ]  }||v s|j                  ||           |S )zGet parents nodes.)r(   rZ   r   )r1   r,   r   parentsrZ   s        r4   r   zONNXModel.get_parentsf  sN    &"&";";ZZ 	;E++259:	; r6   c                     || j                   }t        |j                        |k  ry|j                  |   }||vry||   S )zGet parent node by idx.N)r(   r   rZ   )r1   r,   idxr   rZ   s        r4   
get_parentzONNXModel.get_parentq  sK    &"&";";tzz?c!

3++"5))r6   c                 j    t        |j                        }|j                  |       t        ||      }|S )zFind out node by name.)listr,   r   r   )r1   	node_namenew_nodes_listr+   graph_nodes_listr,   s         r4   find_node_by_namezONNXModel.find_node_by_name  s1    

+/I'78r6   c                     g }|j                   D ]4  }|j                  D ]#  }||j                  k(  s|j                  |       % 6 |S )z2Find all nodes with given initializer as an input.)r,   rZ   r[   r   )r1   r+   r9   r   r,   
node_inputs         r4   find_nodes_by_initializerz#ONNXModel.find_nodes_by_initializer  sN    JJ 	'D"jj '
!1!11LL&'	' r6   c                 `    |j                  d      st        j                  d| d       y fd j                  |   d   }|j                  dk(  r||j
                  d   k(  s!|j                  d	k(  r||j
                  d
   k(  ry |      \  }}|s
J d|        |s
J d|        ||fS )z*Help function to get scale and zero_point.
_quantizedzFind z) in the quantized graph is not quantized.NNc                    
j                   |    d   }
j                  j                  | d      }g d}|N|j                  |v r@|j                  d   j                  dd      j                  dd      j                  dd      }n|j                  dv r@|j                  d   j                  dd      j                  dd      j                  dd      }n2| j                  dd      j                  dd      j                  dd      }|d	z   }
j                  |      }|d
z   }
j                  |      }||| 	|j                  d         \  }}||fS )z/Search scale and zero point tensor recursively.r   N)Reshape	TransposeSqueeze	UnsqueezeMaxPoolPadSplitr   ri   _QuantizeLinear_QuantizeInput)Gather_scale_zero_point)r)   r(   r   rf   rZ   replacer`   r   )tensor_namer,   r"   direct_int8fp32_tensor_namescalescale_tensorzo	zo_tensor	_searcherr1   s            r4   r   z+ONNXModel.get_scale_zero.<locals>._searcher  s^   ,,[9!<D..22;EFeK!fnn&CLLOW\2.W.3W-r2	 ! +KKNW\2.W.3W-r2	 !  ''b9AABSUWX``aqsuv ! %x/E//6L!M1B,,R0I#y'8%.7Q.H+L)**r6   r   QLinearConvrk   QGemmzmissing scale for tensor zmissing zero point for tensor )endswithr   debugr)   rf   rZ   )r1   r   r,   r   r   r   s   `    @r4   get_scale_zerozONNXModel.get_scale_zero  s    |,LL5(QRS	+B ((03 LLM)f

2.FLLG#$**R.(@&/&7#L)E#<VH!EE<G >vhGG9**r6   c                     |rBt         j                  j                  | j                  dt	        |      j
                  dz          t        j                  | j                  |       y)zBSave model to external data, which is needed for model size > 2GB.Tz.data)rp   rq   N)r   r   convert_model_to_external_datar   r   r[   rx   )r1   output_pathuse_external_data_formats      r4   save_model_to_filezONNXModel.save_model_to_file  sO    #%%DDTDDUDZDZ]dDd E  	[1r6   c                     t        |t              rt        |t              sJ t        t        | j                              D ]$  }| j                  |   |k(  s|| j                  |<   & y)zReplace input of a node.N)r   r   ranger   rZ   )r,   old_input_namenew_input_namejs       r4   replace_node_inputzONNXModel.replace_node_input  sV     .#.:nc3RRRs4::' 	/Azz!}. .

1	/r6   c                 ^   |g }|g }t        |      dkD  rL| j                  j                  j                  D ](  }|j                  |v st
        j                  |||       * y| j                  j                  j                  D ](  }|j                  |vst
        j                  |||       * y)zReplace inputs of all nodes.Nr   )r   r2   r+   r,   rf   r
   r  )r1   r  r  white_optypeblack_optyper,   s         r4   replace_input_of_all_nodesz$ONNXModel.replace_input_of_all_nodes  s    LL|q 

((-- W<<</00~~VW 

((-- W<<|300~~VWr6   c                     t        |t              rt        |t              sJ t        t        | j                              D ]$  }| j                  |   |k(  s|| j                  |<   & y)zReplace output of a node.N)r   r   r  r   r`   )r,   old_output_namenew_output_namer  s       r4   replace_node_outputzONNXModel.replace_node_output  sX     /3/JPS4TTTs4;;'( 	1A{{1~0!0A	1r6   c                 ^   |g }|g }t        |      dkD  rL| j                  j                  j                  D ](  }|j                  |v st
        j                  |||       * y| j                  j                  j                  D ](  }|j                  |vst
        j                  |||       * y)zReplace outputs of all nodes.Nr   )r   r2   r+   r,   rf   r
   r  )r1   r  r  r	  r
  r,   s         r4   replace_output_of_all_nodesz%ONNXModel.replace_output_of_all_nodes  s    LL|q 

((-- Z<<</11$YZ 

((-- Z<<|311$YZr6   c                    g }| j                         }|D ]  }|j                  dk(  r\|j                  d   | j                  j                  j                  vr-|j                  d   | j
                  vr|j                  |       o|j                  dk(  rt        | j                  |            dk(  r| j                  |      d   j                  dk(  r{|j                  d   | j                  vr`| j                  |      d   j                  d   | j
                  vr3|j                  |       |j                  | j                  |             7d}|j                  D ]&  }|| j
                  v s|| j                         v s$d} n |j                  D ]8  }| j                  |      || j                  v s|| j                         v s6d} n |s|j                  |        | j                  |       g }| j                  j                  j                  D ]  }|j                  | j
                  vs|j                  | j                  j                  j                  vsI|j                  |       | j	                         j                  D ]E  }	|	j                  |j                  k(  s| j	                         j                  j!                  |	       G  | j#                  |       | j%                          y)	zRemove unused nodes.Constantr   QuantizeLinearr   DequantizeLinearTFN)r   rf   r`   r   r+   r)   r   r   r   rZ   r(   r   r   r   r9   r[   r   r   rb   )
r1   unused_nodesr   r,   unusedr`   rZ   ununsed_weightswgraph_inputs
             r4   remove_unused_nodeszONNXModel.remove_unused_nodes  s   

 	.D
*KKN$++*;*;*B*BBKKN$*C*CC##D) 00))$/0A5%%d+A.66:LLJJqM)B)BB%%d+A.55a8@Y@YY##D)##D$5$5d$;< "kk F!:!::f>U!& "ZZ E++E2> $";";;u

?T!&  ''-=	.> 	,'"".. 	?AvvT666166IZIZIaIa;a&&q)#'::<#5#5 ?K"''1661

**11+>?		? 	  1r6   c           	          |si }i  j                   j                  j                  D ]  }|j                  D ]>  }t	        |j                               dk7  s ||vr|g||<   +||   j                  |       @ |j                  D ]$  }t	        |j                               dk7  s ||<   &  n j                  } j                  i t               }t               } j                   j                  j                  D ]   }|j                  ||j                            "  j                   j                  j                  D ]3  }	t         fd|	j                  D              s#|j                  |	       5 |r|j                         }	t        fd|	j                  D              s|	|vr|j                  |	       G|	|	j                  <   |	j                  D ]=  }
|
|v s|j                  ||
   D cg c]  }|j                  vs||vs| c}       ? t	        |      dk(  r3t	        |      dk7  r%t        j                   |      }|j#                          |rj%                         D cg c]  }|d   	 }}t	        t'        |D 	ch c]  }	|	j                   c}	            t	        t'         j                   j                  j                  D 	ch c]  }	|	j                   c}	            k(  sJ  j                   j                  j)                  d        j                   j                  j                  j                  |       yc c}w c c}w c c}	w c c}	w )zTopological sort the model.r   c              3   N   K   | ]  }|vxr |j                         v  y wN)rZ   ).0r]   r   r1   s     r4   	<genexpr>z-ONNXModel.topological_sort.<locals>.<genexpr>D  s*     [a1//IATZZ\4II[s   "%c              3   J   K   | ]  }|v s|   j                   v   y wr  )r[   )r  r]   	all_nodesr   s     r4   r   z-ONNXModel.topological_sort.<locals>.<genexpr>I  s+     mATUYlTl*1-22i?ms   	##r   r,   N)r2   r+   r,   rZ   r   r   r   r`   r)   r(   r   r   r[   allpopleftcopydeepcopyclearitemsr   
ClearField)r1   enable_subgraphr   r,   r   r   qwaitinpnoutr]   r   r"  r   s   `            @@r4   topological_sortzONNXModel.topological_sort*  s    "$"$

((-- 	@"&** IJ:++-.!3%-@@?Cf/
;/
;BB4HI $(;; @K;,,./14;?+K8@	@ #'";";"&";";	Gw::##)) 	4CHH(23	4!!&& 	A[STSZSZ[[	 		AmmmD=KKN !Iaffxx n--HH)<S)AlAQVVS\E\abjkakalmn 1v{s4yA~MM$'

   )01!1114/A/01StzzO_O_OdOd>e!qvv>e9f5gggg

##F+

$$U+ m 2/>es$   3M
M
M
/MMMc                    |g }t               }|D ]`  }t        |t              r|j                  |       %t        |t        j
                        r|j                  |j                         [J d        g }|D ]`  }t        |t              r|j                  |       %t        |t        j
                        r|j                  |j                         [J d        |r|j                         }||v r||vr|j                  |       n.t        |t        | j                  j                  j                              }| j                  |      D ]  }|j                  |j                          |r|S )z4Get nodes chain with given start node and stop node.zM'get_nodes_chain' function only support list[string]or list[NodeProto] params)r   r   r   r   r   	NodeProtor[   r$  r   r   r2   r+   r,   r   )	r1   startstopresult_chain
start_noder,   	stop_noder   r"   s	            r4   get_nodes_chainzONNXModel.get_nodes_chainZ  sT   LW
 	nD$$!!$'D$..1!!$)),mmmu	n 	 	nD$$  &D$..1  +mmmu	n "**,II%,##I.	4

0@0@0E0E+FGD**40 /!!&++./  r6   c                    g }| j                   j                  j                  D ]#  }d\  }}|j                  dk(  r0|}| j	                  |g dg d      | j	                  |g dg d      g}|j                  dk(  r|}| j	                  |g dg d      | j	                  |g dg d	      | j	                  |g d
g d| j
                  g       | j	                  |g dg d      | j	                  |g dg d      | j	                  |g dg d      | j	                  |g dg d      g}|st        |      s|j                  |       & |S )z,Find split node for layer wise quantization.r   SkipLayerNormalizationMatMulr   r   r   r<  Nr   r   r   r   Addr<  r   r   r<  r   r   r   r   r   r?  r   Nr   r   r   r   Nr   r   r   r   Gemmr   r   r   r<  Nr   r   r   r   r   r   return_indicer?  r<  r   r   r   r<  r   Nr   r   r   r   r   Nr   r   r   r   )r<  Mulr<  rK  Divr?  )Nr   Nr   Nr   )r<  rK  r<  SimplifiedLayerNormalizationr?  )Nr   Nr   r   )r   r+   r,   rf   match_parent_pathr   anyr   )r1   start_nodesr,   r6  qkv_nodes_lists        r4   +find_split_node_for_layer_wise_quantizationz5ONNXModel.find_split_node_for_layer_wise_quantization  s    KK%%** >	+D)3&J||77!
**"O*
 **"K'" ||u$!
 **"K*
 **"$WYk **"X-,0,D,D&( +  **"V-
 **"V-
 **"H3
 **"Z-E'"P ~&z*}>	+~ r6   c                 x   g }| j                   j                  j                  D ]  }|j                  dk(  rj	                  |j
                  g       0d\  }}|j                  dk(  r0|}| j                  |g dg d      | j                  |g dg d      g}|j                  dk(  r|}| j                  |g dg d	      | j                  |g dg d
      | j                  |g dg d| j                  g       | j                  |g dg d      | j                  |g dg d      g}|st        |      s|D cg c]  }||	 c}d   }g }|j                  D ]8  }|| j                  vr||d   j                  d   k(  r(|j	                  |       : t        |      dk7  r|d   }	| j                  }
|
|	   }|D cg c]  }|j                   }}|j                  d      dk(  sj	                  |D cg c]  }|j                  dk(  s|j
                    c}       |r |S  S c c}w c c}w c c}w )zFind qkv MatMul in Attention.

        Args:
            find_all (bool, optional): find all qkv MatMul. Defaults to False

        Returns:
            qkv (list): qkv MatMul list
        	Attentionr   r:  r;  r=  r>  r@  r?  rA  rB  rC  rE  rF  rH  rI  rJ  rk   r   r   r<     )r   r+   r,   rf   r   r[   rN  r   rO  rZ   r`   r   r   count)r1   find_allqkvr,   r6  rQ  	qkv_nodesother_inputsrZ   
root_inputr   r   r   children_typess                 r4   find_qkv_in_attentionzONNXModel.find_qkv_in_attention  sj    KK%%** H	D||{*

DII;')3&J||77!
**"O*
 **"K'" ||u$!
 **"K*
 **"$WYk **"X-,0,D,D&( +  **"V-
 **"V-1"< ~&(6J#/J2NIL#)) + 8 88IaL//22##E*+ < A%%aJ"&":":*:6H9ABemmBNB##H-2

HZ5QY@YEJJZ[
SH	R 
% K CZs   :H-H-?H29H7
H7
c                 >   g }t        t        |            D ]  }|t        |      dz
  k7  r1||dz      }|dz
  dk\  s%|j                  ||dz
     ||dz
     g       E||   }||z   dz
  t        |      k  s_|j                  |||z   dz
     |||z   dz
     g        |S )a  Find MatMul in FFN.

        Args:
            attention_index (list): index of Attention
            attention_matmul_list (list): list of Attention and MatMul nodes
            block_len (int): block length

        Returns:
            list: list of MatMul in FFN
        r      r   )r  r   r   )r1   attention_indexattention_matmul_list	block_len
ffn_matmulr   indexs          r4   find_ffn_matmulzONNXModel.find_ffn_matmul"  s     
_-. 
	Cc/*Q..'a019>%%'<UQY'GI^_dgh_hIi&jk',9$q(3/D+EE%%.uy/@1/DEG\]ben]nqr]rGst
	 r6   c                    ddl m} ddlm} t	        ||      r || j
                  | j                        \  }}}| j                  |       | j                  |       | j                  |       | j                          | j                          | j                          | j                  |       yt        j                  d       t!        d       y)zExport Qlinear to QDQ model.r   )ONNXQlinear2QDQConfig)onnx_qlinear_to_qdqzGUnsupported config for export, only ONNXQlinear2QDQConfig is supported!N)neural_compressor.configrg  neural_compressor.utils.exportrh  r   r   r)   r   r   r   rb   r  r0  ry   r   r   exit)r1   	save_pathconfrg  rh  r   r   initss           r4   exportzONNXModel.export;  s    BFd12-@dNgNg-h*I|UNN9%l+!!%(KKM$$&!!#IIi NNdeGr6   c                    g }|D ]K  }|| j                         vst        j                  j                         }||_        |j                  |       M | j                  j                  j                   j                  |       y)zAdd the tensors to the model outputs to gets their values.

        Args:
            tensor_names: The names of tensors to be dumped.
        N)	r`   r   r   ValueInfoProtor[   r   r   r+   r   )r1   tensor_namesadded_outputsr   added_tensors        r4   add_tensors_to_outputsz ONNXModel.add_tensors_to_outputsM  sn     " 	3FT[[]*#{{99;$*!$$\2		3
 	  ''6r6   c                 F   g }|D ]d  }|| j                         v s|j                  | j                  j                  j                   | j                         j	                  |                f |D ]1  }| j                  j                  j                   j                  |       3 y)zRemove the tensors from the model outputs.

        Args:
            tensor_names: The names of tensors to be removed.
        N)r`   r   r   r+   rd  r   )r1   rr  removed_outputsr   r`   s        r4   remove_tensors_from_outputsz%ONNXModel.remove_tensors_from_outputs[  s     " 	^F&&&t{{'8'8'?'?@S@STZ@['\]	^ & 	4FKK$$++F3	4r6   c                     |g }t        |j                        D ])  \  }}||v s||   }|j                  |k(  s ||vs%||fc S  y)a  Find parent node based on constraints on op_type.

        Args:
            node (str): current node name.
            parent_op_type (str): constraint of parent node op_type.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            exclude (list): list of nodes that are excluded (not allowed to match as parent).

        Returns:
            parent: The matched parent node. None if not found.
            index: The input index of matched parent node. None if not found.
        r   )	enumeraterZ   rf   )r1   r,   parent_op_typer   excluder]   rZ   r"   s           r4   match_first_parentzONNXModel.match_first_parenth  s_     ?G!$**- 	%HAu++,U3>>^3g8M!19$		%
 r6   c                 $   |J ||dk\  sJ |g }|| j                   }|,| j                  ||||      \  }}||j                  |       |S |t        |j                        k\  ry| j                  |||      }||j                  |k(  r||vr|S y)a|  Find parent node based on constraints on op_type and index.

        Args:
            node (str): current node name.
            parent_op_type (str): constraint of parent node op_type.
            input_index (int or None): only check the parent given input index of current node.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            exclude (list): list of nodes that are excluded (not allowed to match as parent).
            return_indice (list): a list to append the input index when input_index is None.

        Returns:
            parent: The matched parent node.
        Nr   )r(   r}  r   r   rZ   r   rf   )	r1   r,   r{  input_indexr   r|  rG  r"   rd  s	            r4   match_parentzONNXModel.match_parent~  s    , "kQ&666?G&"&";"; 33D.J]_fgMFE($$U+M#djj/){4GH&..N"BvU\G\Mr6   c           	          t        |      t        |      k(  sJ || j                  }|}g }t        |      D ]6  \  }}	| j                  ||	||   |g |      }
|
 y|j	                  |
       |
}8 |S )a  Find a sequence of input edges based on constraints on parent op_type and index.

        Args:
            node (str): current node name.
            parent_op_types (str): constraint of parent node op_type of each input edge.
            parent_input_index (list): constraint of input index of each input edge.
                                       None means no constraint.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            return_indice (list): a list to append the input index when there is
                                  no constraint on input index of an edge.

        Returns:
            parents: a list of matched parent node.
        N)r|  rG  )r   r(   rz  r  r   )r1   r,   parent_op_typesparent_input_indexr   rG  current_nodematched_parentsr]   rf   matched_parents              r4   rN  zONNXModel.match_parent_path  s    , %&#o*>>>>&"&";";#O4 	*JAw!.."1%#+ / N %"">2)L	* r6   c                 n    | j                   j                  j                  D ]  }d|j                  v s y y)z~Check the model is smooth quantized or not.

        Returns:
            bool: the model is smooth quantized or not.
        _smooth_scaleTF)r2   r+   r9   r[   )r1   rB   s     r4   is_smoothquant_modelzONNXModel.is_smoothquant_model  s7     JJ$$00 	D$))+	 r6   c                 &    | j                         }|S )z-Find split nodes for layer-wise quantization.)rR  )r1   split_nodess     r4   find_split_nodeszONNXModel.find_split_nodes  s    FFHr6   c                 &
   t        j                         }|j                  | j                         |j                  j                  d       t        j                         }|j                  | j                         |j                  j                  d       d}d}| j                  j                  j                  D ]u  }	|dk(  r&|j                  j                  j                  |	       n*|dk(  r%|j                  j                  j                  |	       |	j                  |k(  sh|	j                  }d}w t        |      dk(  sJ d| dt        |       d       |d   }
|rH	 dd	lm}  || j                  d
t        j                  j                  | j                               | _        | j)                  |
      \  }}t         j*                  j-                  |
||      }t/        |d
      }t/        |d
      }|j1                          |j1                          |j2                  j                  j                  j                  |       |j2                  j                  j4                  j                  |       g }g }|j6                  D ]  }||j8                  v s| j)                  |      \  }}t         j*                  j-                  |||      }||j2                  j                  j                  vr|j                  |       ||j2                  j                  j4                  vs|j                  |        |D ]1  }|j2                  j                  j                  j                  |       3 |D ]1  }|j2                  j                  j4                  j                  |       3 |j;                          |j;                          |j=                          |j=                          t        j                  j                  |      }|j?                  |       t        j                  jA                  |d      }||_!        |jE                  |       |jG                          t$        jI                  d| d       |rv|j?                  |       t        j                  jA                  |d      }||_!        |jE                  |       |jG                          t$        jI                  d| d       ||fS ||fS # t"        $ r}t$        j'                  d       |d}~ww xY w)a[  Split model into two parts at a given node.

        Args:
            split_node_name (str): name of the node where the model is split at>
            path_of_model_to_split (str): path of model to be split.
            shape_infer (bool): do shape inference. Default is True.
            save_both_split_models (bool): whether to save the two split models.
                False means only save the first split model.
                True means save both the two split models.
                Default id True.

        Returns:
            tuple: the first split model, the second split model
        r,   Nr   r_  zJOnly support split at node with 1 output tensor, while current split node z has z output tensorsr   )infer_shapesT)
auto_mergebase_dirzShape infer fails for layer-wise quantization. We would recommend checking the graph optimization level of your model and setting it to 'DISABLE_ALL' or 'ENABLE_BASIC', as this may help avoid this error.)r   zsplit_model_part_1.onnxzsave split model part 1 to z for layer wise quantizationzsplit_model_part_2.onnxzsave split model part 2 to )%r   
ModelProtoCopyFromr   r+   r)  r,   r   r[   r`   r   'neural_compressor.adaptor.ox_utils.utilr  r   r   r   r   r@   r   error%_get_output_type_shape_by_tensor_namer   make_tensor_value_infor
   _remove_unused_input_outputr2   rZ   r   r   remove_unused_initrb    load_model_initializer_by_tensorjoinrJ   _save_split_modelr   r   )r1   split_node_namepath_of_model_to_splitshape_infersave_both_split_modelssplit_model_part_1split_model_part_2split_node_outputpart_idxr,   split_tensor_namer  rD   split_tensor_typesplit_tensor_shapesplit_tensorinsert_output_for_model_1insert_input_for_model_2r`   output_typeoutput_shapeoutput_tensorrZ   dir_of_model_to_splitsplit_model_part_1_pathsplit_model_part_2_paths                             r4   split_model_with_nodezONNXModel.split_model_with_node  s   * "__.##DKK0  ++F3!__.##DKK0  ++F3 KK%%** 	D1}"((--44T:Q"((--44T:yyO+$(KK!	 $%* 	
XYhXiinor  tE  pF  oG  GV  W	
* .a0 P*4;;4RTRYRYRaRabfbrbrRst 150Z0Z[l0m--{{99:KM^`rs&'9$O&'9$O 	668668  &&--44\B  &&,,33LA$&!#% (<< 	CF+???,0,V,VW],^)\ $ B B6;Xd e (:(@(@(F(F(M(MM-44]C (:(@(@(F(F(L(LL,33MB	C 0 	AF$$**1188@	A . 	?E$$**0077>	? 	--/--/!!#!!# "0F G;;<QR"$'',,/DF_"`(?%,,-DE//123J2KKghi!??@UV&(ggll3HJc&d#,C)001HI335LL67N6OOklm%'999%'999E  9 s   AS+ +	T4TTc           	          t         j                  j                  |dz         rt        j                  |dz          t	        j
                  | j                  |dd|j                  d      d   dz   dd       y)	zSave split model as external data for layer wise quantization.

        Args:
            save_path (str): the path to save the split model
        rl   Trj   rk   rm   Frn   N)r   r   r!   r   r   rx   r   rv   )r1   rl  s     r4   r  zONNXModel._save_split_model`  sb     77>>)g-.IIi')*KK"&$(__S)"-7#	
r6   c                    t         j                  j                  }d}| j                  j                  j
                  D ]  }|j                  |k(  s|j                  j                  j                  }|j                  j                  j                  j                  D cg c]!  }|j                  d      r|j                  nd# }} ||fS  ||fS c c}w )zGet output type and shape with a tensor name.

        Args:
            tensor_name (str): name of a tensor

        Returns:
            tuple: output type and shape
        N	dim_valuerk   )r   r;   FLOATr   r+   
value_infor[   r   tensor_type	elem_typeshapedimr:   r  )r1   r   r  r  r`   r  s         r4   r  z/ONNXModel._get_output_type_shape_by_tensor_namer  s     $$**	kk''22 	F{{k)"KK33==	RXR]R]RiRiRoRoRsRsKNS\\+%>CMMBF  %	 %	s   &Cc                     g }g }| j                   j                  j                  D ],  }|j                  | j                  vs|j                  |       . | j                   j                  j                  D ],  }|j                  | j                  vs|j                  |       . |D ]1  }| j                   j                  j                  j                  |       3 |D ]1  }| j                   j                  j                  j                  |       3 y)z-Remove unused input & output for split model.N)	r   r+   r`   r[   r   r   rZ   r   r   )r1   remove_outputsremove_inputsr`   rZ   s        r4   r  z%ONNXModel._remove_unused_input_output  s    kk''.. 	.F{{$":"::%%f-	. [[&&,, 	,Ezz!9!99$$U+	, % 	4FKK$$++F3	4" 	2EKK##**51	2r6   c                     g }| j                   j                  j                  D ],  }|j                  | j                  vs|j                  |       . | j                  |       y)zRemove unused init.N)r   r+   r9   r[   r   r   r   )r1   remov_initsrB   s      r4   r  zONNXModel.remove_unused_init  sV    KK%%11 	)Dyy 8 88""4(	) 	  -r6   c                 X   |)t         j                  j                  | j                        }| j                  j
                  j                  D ]\  }|j                  d      s|j                  t        j                  j                  k(  s=t        j                  j                  ||       ^ y)zLoad model initializer by tensor.

        Args:
            data_path (str, optional): the directory of saved initializer. Defaults to None.
        Nr8   )r   r   r   r   r   r+   r9   r:   r8   r   r;   r<   r   load_external_data_for_tensor)r1   	data_pathrB   s      r4   r  z*ONNXModel.load_model_initializer_by_tensor  s~     (8(89IKK%%11 	YD}}_-$2D2DHXHXHaHa2a))GGiX	Yr6   c                    |rt         j                  j                  t         j                  j                  t         j                  j	                  | j
                        |            rZt        j                  t         j                  j                  t         j                  j	                  | j
                        |             | j                          t        j                  j                  | j                  |       t        j                  j                  | j                  t         j                  j	                  | j
                               y)a}  Write external data of merged quantized model to new location to save memory.

        Args:
            external_data_location (str, optional): external data location of merged quantized model.
                                                    Defaults to "external.data".
            overwrite (bool, optional): if True, remove existed externa data. Defaults to False.
        )rq   )filepathN)r   r   r!   r  r   r   r   r  r   r   r   r   write_external_data_tensors)r1   external_data_location	overwrites      r4   #write_external_data_to_new_locationz-ONNXModel.write_external_data_to_new_location  s     RWW__TEUEU5VXn(opIIbggll277??43C3C#DF\]^--/!!@@Wm@n!!==dkkTVT[T[TcTcdhdtdtTu=vr6   c                    |j                          | j                  t        |j                                      | j	                  t        |j                                      | j                          |j                         j                  D ]N  }|j                  | j                         vs | j                  j                  j                  j                  |       P g }| j                  j                  j                  D ]0  }|j                  |j                         v s |j                  |       2 |D ]1  }| j                  j                  j                  j                  |       3 |j                         j                  D ]  }|j                  | j                         vs |j                  | j                         vs=|j                  | j                  vsV| j                  j                  j                  j                  |        y)z'Merge two split model into final model.N)r  r   r   r   r   r9   rb   r+   r`   r[   r   r   rZ   r   r   )r1   to_merge_modelr`   remove_outputrZ   s        r4   merge_split_modelszONNXModel.merge_split_models  s   ::<tN00234d>#=#=#?@A %**,33 	8F{{$++-/!!((//7	8
 kk''.. 	-F{{n2244$$V,	- $ 	4FKK$$++F3	4 $))+11 	6E

$**,.JJdkkm3JJd&>&>>!!''..u5	6r6   c                 t   i }g }| j                   j                  j                  D ]"  }|||j                  <   |j	                  |       $ |D ]1  }| j                   j                  j                  j                  |       3 |D ]4  }| j                   j                  j                  j	                  ||          6 y)z:Re-org output of merged model for layer-wise quantization.N)r   r+   r`   r[   r   r   )r1   origin_outputoutputs
tmp_remover`   out_names         r4   re_org_outputzONNXModel.re_org_output  s    
kk''.. 	&F#)GFKK f%	& ! 	4FKK$$++F3	4 & 	?HKK$$++GH,=>	?r6   )Fr  r   )NNNN)TT)zexternal.dataF)M__name__
__module____qualname____doc__r5   r   propertyrG   rJ   setterrP   rS   rV   r2   rZ   r`   rb   rd   r/   ry   r   r9   r+   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r*   r   r-   r   r   r   r   r   r   r   r  staticmethodr  r  r  r  r  r0  r8  rR  r]  re  ro  ru  rx  r}  r  rN  r  r  r  r  r  r  r  r  r  r  r  rO   r6   r4   r
   r
   &   st   B%. $ $           __" "     \\> >9:>    >
J0&-!&(0
#
.4;
)
	9
1
) ) )K$ ) )B
	*2+h2 / /W 1 1Z/b.,`%NJXSj2$744  *b !,\	 aev:p
$ (2".
Yw6:?r6   r
   )r  r%  loggingr   r>   collectionsr   pathlibr   r   onnx.external_data_helperutilr   r   	getLoggerr   r
   rO   r6   r4   <module>r     sD   &    	 
      0			.	/
}? }?r6   