
    piF                         d dl Z d dlZd dlZd dlZd dlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddl m!Z!  G d	 d
e      Z"y)    N)onnx_pb   )BaseQuantizerQuantizationParams)
TensorData)	ONNXModel)TENSOR_NAME_QUANT_SUFFIXQuantizationModeQuantizedValueQuantizedValueType__producer____version__add_infer_metadataattribute_to_kwargcompute_scale_zpcompute_scale_zp_float8find_by_nameget_qmin_qmax_for_qTypeget_qrange_for_qType	ms_domainquantize_onnx_initializer&save_and_reload_model_with_shape_infertensor_proto_to_array)CreateOpQuantizerc                      e Zd Z	 d&dZd Zd Zd Zd Zd Zd Z	d	 Z
d'd
Zd Zd Zd Zd Zd(dZ	 d)dZd Z	 d&dZdej*                  dej*                  dedej0                  dedeeej*                  dz  f   fdZdedej*                  ddfdZd*dZd Zd'dZ	 	 	 	 d+dZ 	 	 	 	 	 d,d Z!d-d!Z"	 	 d.d"Z#d# Z$d$ Z%d% Z&y)/ONNXQuantizerNc                 <   t        j                  | |||||||	|
||       |s| j                  j                          t	        | j                  j                        }|j
                  j                  D ci c]  }|j                  | c}| _        | j                  j                  |j
                  j                  D ci c]  }|j                  | c}       | j                  j                  |j
                  j                  D ci c]  }|j                  | c}       t        |      | _        || _        || _        | j                  dkD  | _        d| j"                  v xr | j"                  d   | _        g | _        d| _        i | _        | j*                  j                  |j
                  j                  D ci c]  }|j                  d c}       | j*                  j                  |j
                  j                  D ci c]  }|j                  d c}       | j                  j                  j
                  j,                  D ];  }| j*                  j                  t.        j1                  |j                  d             = | j                  t2        vrt5        d| j                         | j7                         | _        d| _        d| _        d| _        d	| _         i | _!        | j                  jE                         | _#        y c c}w c c}w c c}w c c}w c c}w )
N
   MatMulConstBOnly/r   zunsupported quantization mode fixed_quantization_range_uint8fixed_quantization_range_int8
fixed_zerofixed_zero_zp)$r   __init__modelreplace_gemm_with_matmulr   graph
value_infonamevalue_infosupdateoutputinputr   modestaticopset_versionfuse_dynamic_quantextra_optionsq_matmul_const_b_only	new_nodesgraph_scopetensor_namesnodedictfromkeysr
   
ValueErrorcalculate_quantization_paramsquantization_paramsfixed_qrange_uint8_namefixed_qrange_int8_namefixed_zero_namefixed_zero_zp_namequantized_value_mapget_non_initializer_inputsgenerated_value_names)selfr&   per_channelreduce_ranger/   r0   weight_qTypeactivation_qTypetensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizer3   viotitr8   s                    i/opt/services/ai/voice_agent/venv/lib/python3.12/site-packages/onnxruntime/quantization/onnx_quantizer.pyr%   zONNXQuantizer.__init__'   s    	 	
 JJ//1:4::;K;KLE6;kk6L6LMMD##5;;;M;M$NRRWWb[$NO##5;;;L;L$MRRWWb[$MN"5)DJ	"&"4"4r"9%74;M;M%M%xRVRdRdewRx"  u{{7I7I!J"''1*!JK  u{{7H7H!I"''1*!IJJJ$$**// 	DD$$T]]4;;%BC	D 99,,=dii[IJJ#'#E#E#G  (H$&E#+"1 $&  &*ZZ%J%J%L"K  N$N$M "K!Is   4L=L
L8L Lc                 :   t         j                  j                  |d| j                  j                  j                        }t        |       t        || j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                   | j"                        }| |_        | j&                   | d|_        |j)                          |j                  j                  j*                  S )z
        generate submodel for the subgraph, so that we re-utilize current quantization implementation.
        quantize the submodel
        update subgraph and set it back to node
        onnx-quantizer)producer_nameopset_importsr    )onnxhelper
make_modelr&   opset_importr   r   rF   rG   r/   r0   rH   rI   rJ   rK   rL   rM   r3   parentr6   quantize_modelr(   )rE   subgraph	graph_keywarped_modelsub_quantizers        rQ   quantize_subgraphzONNXQuantizer.quantize_subgraphp   s     {{--*****77 . 

 	<(%IIKK!!""!!%%
  $'+'7'7&81$E!$$&""((...    c                 6   |j                   D cg c]R  }|j                  t        j                  j                  k(  s'|j                  t        j                  j
                  k(  r|T }}t        |      dk(  r|S |j                  r|j                  n#|j                   dt        | j                         }i }|j                   D ]  }|j                  t        j                  j                  k(  r8|j                  | j                  |j                  | d|j                         i}n|j                  t        j                  j
                  k(  r_g }|j                  D ]?  }|j                  | j                  || d|j                   dt        |             g       A |j                  |i}nt        |      }|j                  |        t        j                   j"                  |j                  |j$                  |j&                  fd|j                  i|S c c}w )z|
        Check subgraph, if any, quantize it and replace it.
        return new_nodes added for quantizing subgraph
        r   _node_count_:r*   )	attributetyperV   AttributeProtoGRAPHGRAPHSlenr*   op_typer5   r`   ggraphsextendr   r,   rW   	make_noder.   r-   )	rE   r8   attrgraph_attrs	node_namekwargskvvaluer\   s	            rQ   quantize_node_with_sub_graphz*ONNXQuantizer.quantize_node_with_sub_graph   s    
yyD//555dFYFYF`F`9` 
 

 {q K!%DII4<<.SQUQ_Q_M`La0b	NN 	DyyD//555ii!7!79+Qtyyk@Z![\d11888 $ HLL 22 (#,+Qtyyk3u:, G ii''-MM"#	$ {{$$T\\4::t{{eQUQZQZe^dee7
s   AHc                 V    t        d | j                  j                         D              S )zQ
        Detect if model already has QuantizeLinear or DequantizeLinear.
        c              3   \   K   | ]$  }|j                   d k(  xs |j                   dk(   & yw)QuantizeLinearDequantizeLinearN)rk   ).0r8   s     rQ   	<genexpr>z.ONNXQuantizer.has_QDQ_nodes.<locals>.<genexpr>   s1      
W[DLL,,R@R0RR
s   *,)anyr&   nodes)rE   s    rQ   has_QDQ_nodeszONNXQuantizer.has_QDQ_nodes   s-      
_c_i_i_o_o_q
 
 	
ra   c                     t        || j                  j                               y| j                  | j                  j	                  |      S y)NTF)r   r&   initializerrZ   find_initializer_in_path)rE   initializer_names     rQ   r   z&ONNXQuantizer.find_initializer_in_path   sC    ($***@*@*BCO;;";;778HIIra   c                     | j                   j                  |       |D ].  }|j                  D ]  }| j                  j	                  |        0 y N)r5   rn   r-   rD   add)rE   r~   r8   output_names       rQ   add_new_nodeszONNXQuantizer.add_new_nodes   sJ    e$ 	<D#{{ <**..{;<	<ra   c                    | j                         rt        j                  d       | j                  j	                         D ]  }| j
                  r| j                  |      }t        | j                        }t        | |      }|j                          t        |t        | j                              D ];  }| j                  |   j                  D ]  }| j                  j                  |        =  | j                          | j                  j!                         j#                  d       | j                  j!                         j$                  j'                  | j                         | j(                  B| j                  j+                         \  }}t        |      dkD  rt-        dt/        |      z         t0        | j                  j                  _        t4        | j                  j                  _        | j                  j                  j8                  D cg c]  }|j:                  t<        k(  s| }	}|	sk| j                  D cg c]  }|j:                  dk(  s| }
}|
r@| j                  j                  j8                  j                         }d|_        t<        |_        | j                  j                  S c c}w c c}w )NzPlease check if the model is already quantized. Note you don't need to quantize a QAT model. OnnxRuntime support to run QAT model directly.r8   r   z0Invalid model with unknown initializers/tensors.zcom.microsoftr   ) r   loggingwarningr&   r~   enable_subgraph_quantizationrv   rj   r5   r   quantizeranger-   rD   r   _dequantize_outputsr(   
ClearFieldr8   rn   rZ   clean_initializersRuntimeErrorstrr   rT   r   producer_versionrY   domainr   version)rE   r8   number_of_existing_new_nodesop_quantizerir   _initializers_not_foundopsetms_opsetms_nodess              rQ   r[   zONNXQuantizer.quantize_model   s0   OOn
 JJ$$& 
	@D0088>+.t~~+>(,T48L!!#7T^^9LM @#'>>!#4#;#; @K..22;?@@
	@ 	  " 	

%%f-

&&t~~6 ;;(,

(E(E(G%A%)*Q."#UX[\rXs#stt)5

&,7

)'+zz'7'7'D'DbeXaHaEbb)-Z4;;/;YZHZ

((5599; !(zz cZs   J6-J6J;J;c                     d| j                   v r3t        j                  d|| j                   d          | j                   d   S t        d|d      )NDefaultTensorTypezDget_tensor_type returns DefaultTensorType for tensor name %r, use %dz)Unable to find data type for weight_name=a7  . shape_inference failed to return a type probably this node is from a different domain or using an input produced by such an operator. This may happen if you quantize a model already quantized. You may use extra_options `DefaultTensorType` to indicate the default weight type, usually `onnx.TensorProto.FLOAT`.)r3   r   infor   rE   tensor_names     rQ   _get_default_tensor_typez&ONNXQuantizer._get_default_tensor_type   sf    $"4"44LLV""#67
 %%&9::7 GI J
 	
ra   c                    t        || j                  j                               }||j                  S || j                  v r| j                  |   }|j
                  j                  d      rV|r4|j
                  j                  j                  dk(  r| j                  |      S |j
                  j                  j                  S | j                  r| j                  |r| j                  |      S y | j                  j                  |      }||S | j                  r+| j                  r| j                  j                  |      }||S |r| j                  |      S y )Ntensor_typer   )r   r&   r   	data_typer+   rf   HasFieldr   	elem_typer   r   rZ   is_valid_quantize_weightget_tensor_type)rE   r   	mandatoryweightrN   otyperess          rQ   r   zONNXQuantizer.get_tensor_type  s*   k4::+A+A+CD###$***!!+.Bww.!4!4!>!>!!C88EEww**44411t{{7J44[AA44[AL,,++--k:C
00==ra   c                 H   | j                  |      r| j                  |      S || j                  v r| j                  |   }|j                  j	                  d      rU|j                  j
                  j                  t        j                  j                  t        j                  j                  fv ryt        j                  d|d|j                   d       y| j                  r'| j                  r| j                  j                  |      S t        j                  d|d       y)	Nr   Tz<Inference failed or unsupported type to quantize for tensor z
, type is .Fz%Failed to infer data type of tensor: zS. Please add data type info for this tensor if your model has customized operators.)is_input_a_initializerr   r+   rf   r   r   r   
onnx_protoTensorProtoFLOATFLOAT16r   r   r   rZ   is_float_tensor)rE   r   rN   s      rQ   r   zONNXQuantizer.is_float_tensor  s   &&{300==$***!!+.Bww.2773F3F3P3P&&,,&&..U 4 OON{o]ghjhohogppqr ,,;;..{;;3K? C6 7	
 ra   c                     |t         j                  j                  k(  r| j                  |||      S |t         j                  j                  k(  r| j                  |||      S t        d| d      )a  
        Create nodes for dynamic quantization of input and add them to nodes_list.
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter qType: type to quantize to.
            parameter initial_type: type to quantize from
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        zUnexpected value for qType=r   )r   r   INT8+_get_dynamic_input_quantization_params_int8UINT8,_get_dynamic_input_quantization_params_uint8r;   )rE   
input_name
nodes_listqTypeinitial_types        rQ   &_get_dynamic_input_quantization_paramsz4ONNXQuantizer._get_dynamic_input_quantization_params6  so     J**///CCJPZ\hiiJ**000DDZQ[]ijj6ugQ?@@ra   c                    t         j                  j                  }|dz   }|dz   }t        j                  j                  d|g|dz   g|d      }|j                  |       |dz   }t        j                  j                  d|g|dz   g|d      }	|j                  |	       |d	z   }
t        j                  j                  d
|j                  d   g|
dz   g|
      }|j                  |       |d	z   }t        j                  j                  d
|	j                  d   g|dz   g|      }|j                  |       |dz   }t        j                  j                  d|j                  d   |j                  d   g|dz   g|      }|j                  |       t        j                  j                  | j                  |g t        |      dz  g      }| j                  j                  |       |dz   }t        j                  j                  d|j                  d   | j                  g|g|      }|j                  |       t        j                  j                  | j                  |g dg      }| j                  j                  |       || j                  g g fS )az  
        Create nodes for dynamic quantization of input to int8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter initial_type: initial weight type (FLOAT or FLOAT16)
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        _scale
_ReduceMin	ReduceMin:0r   keepdims
_ReduceMax	ReduceMax_AbsAbs_Abs_MaxMax       @	scale_DivDiv)r   r   r   rV   rW   ro   appendr-   make_tensorr?   r   r&   add_initializerrA   )rE   r   r   r   r   input_scale_namereduce_min_namereduce_min_nodereduce_max_namereduce_max_nodereduce_min_abs_namereduce_min_abs_nodereduce_max_abs_namereduce_max_abs_nodeabs_max_nameabs_max_nodeinitializer_divscale_div_namescale_div_nodeinitializer_zps                       rQ   r   z9ONNXQuantizer._get_dynamic_input_quantization_params_int8E  s    &&++ &0$|3++//Lt#$ 0 
 	/*$|3++//Lt#$ 0 
 	/* .6"kk33##A&' 4'(	
 	-.-6"kk33##A&' 4'(	
 	-.!J.{{,, ''*,?,F,Fq,IJD !	
 	,'++11''!%(3./	
 	

""?3#k1..  #T%@%@A	
 	.) 001H1H%QSVWUXY

"">2!8!8"b@@ra   c                 .   t         j                  j                  }|dz   }|dz   }|dz   }t        j                  j                  d|g|dz   g|d      }|j                  |       |dz   }	t        j                  j                  d	|g|	dz   g|	d      }
|j                  |
       t        j                  j                  | j                  |g t        |      g      }| j                  j                  |       t        j                  j                  | j                  |g d
g      }| j                  j                  |       |dz   }t        j                  j                  d|
j                  d   |j                  d   g|dz   g|      }|j                  |       |dz   }t        j                  j                  d|j                  d   | j                  g|g|      }|j                  |       |dz   }t        j                  j                  d| j                  |j                  d   g|dz   g|      }|j                  |       |dz   }t        j                  j                  d|j                  d   |g|dz   g|      }|j                  |       |dz   }t        j                  j                  d|j                  |dz   g|      }|j                  |       |dz   }t        j                  j                  d|j                  |g||      }|j                  |       ||g g fS )a{  
        Create nodes for dynamic quantization of input to uint8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter initial_type: initial weight type (FLAOT or FLOAT16)
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        r   _zero_pointr   r   r   r   r   r   r           
_scale_SubSub
_scale_Divr   _zero_point_Sub_zero_point_Div_zero_point_FloorFloor_zero_point_CastCast)to)r   r   r   rV   rW   ro   r   r   r>   r   r&   r   r@   r-   )rE   r   r   r   r   r   input_zp_namer   r   r   r   initializer_qrangeinitializer_qvaluescale_sub_namescale_sub_noder   r   zp_sub_namezp_sub_nodezp_div_namezp_div_nodezp_floor_namezp_floor_nodezp_cast_namezp_cast_nodes                            rQ   r   z:ONNXQuantizer._get_dynamic_input_quantization_params_uint8  s6    &&,,%0"]2$|3++//Lt#$ 0 
 	/*$|3++//Lt#$ 0 
 	/* "[[44((!%()	
 	

""#56![[44T5I5I<Y[^a]bc

""#56 $l2..##A&(>(>q(ABd"#	
 	.)#l2..""1%t'C'CD	
 	.) !#44kk++!!?#9#9!#<=4 	
 	+& #44kk++"$454 	
 	+&"%88--g{7I7IM\`L`Kacpq-(!$66{{,,V]5I5IM?\hmr,s,'B66ra   c                 H   | j                   }||A| j                  || j                  vrt        j                  d| d       y| j                  |   }t	        |t
              st        dt        |       d|d      |t        |      dk7  rt        d| d	|       t        j                  |d
   g      }t        |d   d      r/|d   j                  t        j                  t        j                  fvrt        dt        |d          d|      t        j                  |d   g      }|j                  t        j                   k7  sJ |d   }n~t        j                  |g      }t        j                  |g      }| j                  |   }d|v r |d   j                  }|j#                  |      }|j                  t        j                   k7  sJ g }	|dz   }
g }|dz   }t$        j&                  j)                  |
||	|j+                         j-                               }| j.                  j1                  |       |j                  t        j                  k(  rt2        j4                  j6                  }nS|j                  t        j                  k(  rt2        j4                  j8                  }nt        d|j                   d|      t$        j&                  j)                  ||||j;                  d      j-                               }| j.                  j1                  |       d||
||	fS )a\  
        Create initializers and inputs in the graph for zero point and scale of output.
        Zero point and scale values are obtained from self.quantization_params if specified.
            parameter param_name: Name of the quantization parameter.
            return: result, scale_name, zero_point_name, scale_shape, zero_point_shape.
        z$Quantization parameters for tensor:"z" not specified)F r   r   r   Unexpected type  for r      zbQuantization parameters should contain zero point, scale, quant type. Specified values for output z: 
zero_pointscaledtypez and param_name=
quant_typer   r   zUnexpected dtype=z for param_name=)T)rI   r=   r   r   
isinstancer   	TypeErrorrf   rj   r;   nparrayhasattrr  float32float16float64astyperV   rW   r   raveltolistr&   r   r   r   r   r   reshape)rE   
param_name	use_scaleuse_zeropointzero_point_typeparamszero_point_valuesscale_valuesr  zero_point_shapezero_point_namescale_shape
scale_nameinit_zp
scale_type
init_scales                   rQ   _get_quantization_paramsz&ONNXQuantizer._get_quantization_params  s    // 5''/:TE]E]3]CJ<_`,--j9Ff&89"24<.j^ST UVV~V!1 33=,bJ 
 !#&*>)? @6'?G4w8M8MVXV`V`bdblblUm8m #3D4I3JJZ[eZh!ijj88VG_$56L%%333$\2O "- 988YK0L--j9F& w--+2259%%333$}4(*
 ++))_.>@Q@W@W@Y@`@`@b
 	

""7++#//55J2::-#//77J01C1C0DDTU_Tbcdd[[,,Z[R^RfRfglRmRtRtRvw


"":.Z+?OOOra   c           	         |j                   |   }|dk7  sJ d       |t        z   }|dz   }	|	|d||}}}
n| j                  |      \  }
}}}}g }|
r't        j                  j                  d|||g|g|	      }n| j                  ry| j                  rN|t        j                  j                  k(  r1|dz   }|dz   }t        j                  j                  d	|g|||g|	      }nU|J d
|d| d| d|        | j                  ||||      \  }}}}t        j                  j                  d|||g|g|	      }t        |||||      | j                  |<   g ||S )a  
        Given an input for a node (which is not a initializer), this function

        - add nodes to compute zero point and scale for this input if they don't exist.
        - add new QuantizeLinear node to quantize the input.

        :param node: node being quantized in NodeProto format.
        :param input_index: index of input in node.input.
        :param qType: type to quantize to.
        :param given_scale_name: if those inputs need to be quanitzed using this scale tensor.
        :param given_zp_name: if those inputs to be quantized using this zeropoint tensor.
        :param initial_type: type of the weight to quantize
        :return: List of newly created nodes in NodeProto format.
        r   z*Cannot access undefined variable in graph._QuantizeLinearNTry   r   r   DynamicQuantizeLinearzCCannot quantize input without knowing the initial type, input_name=z, input_index=z, qType=z, node=r   )r.   r	   r"  rV   rW   ro   r0   r2   r   r   r   r   r   rB   )rE   r8   input_indexr   given_scale_namegiven_zp_namer   r   r   ql_node_name
data_foundr  zp_namer   r~   qlinear_noder  zp_shapes                     rQ   _get_quantize_input_nodesz'ONNXQuantizer._get_quantize_input_nodes2  s   " ZZ,
RM!MM #;;!$55(}/H/35E}G
J484Q4QR\4]1J
GQ;;00 Z1	L {{ &&5J4J4J4P4P+P'(2
$}4#{{44+L *g6 	  $/ "",~k](SXRYY`ae`fh/ ??
ESXgs?t#{{44$W5 M 	  0>j+Wacjlq/r  ,%%%%ra   c                     || j                   v r| j                   |   S | j                  | j                  j                  |      S y r   )rB   rZ   find_quantized_value)rE   r   s     rQ   r1  z"ONNXQuantizer.find_quantized_valuex  sC    111++J77;;";;33J??ra   c
                 &   t        j                  |      }
|d|
z  z  |z  }t        j                  |j                         t         j                        }t        j                  |j                         t         j                        }||z  }||k  r~|dkD  ry||z  }||z  }|	8t        j                  d| d| d| d       dt        j                  ||      fS t        j                  d	|	 d
| d| d| d	       d|j                  |      fS d|fS )zHAdjust a single weight scale to ensure the int32 bias does not overflow.r   r  r   zIncreasing scale for weight `z` by the ratio z to ensure bias `z` has a valid scale.TzIncreased scale[z] for weight `z` by ratio F)r
  absr  itemr  r   r   r  )rE   bias_valinput_scaleweight_scaleweight_scale_dtypeweight_name	bias_nameqrangemultiplicative_epsilonidxabsmaxbias_smallest_valid_scaleinput_scale_fp64weight_scale_fp64bias_candidate_scaleratio	new_scales                    rQ   $adjust_single_weight_scale_if_neededz2ONNXQuantizer.adjust_single_weight_scale_if_needed  s:    !$:cFl$Kf$T!88K$4$4$6bjjIHH\%6%6%8

K/2CC #<<CWZ]C]-0DDE)E1I{3K=PUw W$$-;.BD RXXi7IJJJ&se>+kRWQX Y''0k1EG Y--.@AAAl""ra   r7  r8  r:  bias_tpis_per_channelreturnc                 J   |j                   syt        |      }t        j                  t        j                        }d}t        j
                  |j                  t        j                        t        j
                  |j                  dz   t        j                        z
  }	|j                  }
d}|st        j                  |j                         t        j
                  dt        j                              }t        j                  |j                         t        j
                  dt        j                              }t        j                  t        j                  |      t        j                  |            }| j                  ||||
||j                  |	|      \  }}|r|}d}||fS |j                  rlt!        |j                        dk(  rTt#        |j                  d         D ]9  }| j                  ||   |||   |
||j                  |	||	      \  }}|s3|||<   d}; ||fS )	zOChecks if the bias scale is too small and increases the weight scale if needed.)FNgqh ?r3  r   Fr   T)r>  )sizer   r
  iinfoint32r  maxr  minr  minimummaximumr4  rF  r*   shaperj   r   )rE   r7  r8  r:  rG  rH  bias_float_data
int32_infor=  r<  r9  updatedrminrmaxr?  changedrE  r   s                     rQ   #_adjust_weight_scale_for_int32_biasz1ONNXQuantizer._adjust_weight_scale_for_int32_bias  s      /8XXbhh'
!'*..

;bhhz~~XYGYacakak>ll)//::o113RXXarzz5RSD::o113RXXarzz5RSDZZtbffTl;F!%!J!J"&	"GY ($ $$# C(:(:$;q$@<--a01 #%)%N%N#A& O&LL* &O 
&" &/LO"G#  $$ra   rE  c                 (   || j                   vry| j                   |   }t        || j                  j                               }t        |j                  | j                  j                               }t        |j
                  | j                  j                               }t        |j                  | j                  j                               }||||y| j                  j                  |       | j                  j                  |       t        j                  j                  |      }|j                  }	t        j                  |t        j                  j                  |j                               }
t        j                  j#                  |
j%                  |j&                        |j                        }| j                  j)                  |       t+        || j,                  ||
|	|j                        }| j                  j)                  |       y)zCRe-quantizes the given weight initializer using the provided scale.Nr3  )quant_weight_name)rB   r   r&   r   r  r,  q_nameremove_initializerrV   numpy_helperto_arrayaxisr
  asarrayrW   tensor_dtype_to_np_dtyper   
from_arrayr  dimsr   r   rH   )rE   r:  rE  qv	weight_tp
scale_initzp_initq_weight_initweight_zero_pointr`  scale_npnew_scale_initnew_q_weights                rQ   _requantize_weightz ONNXQuantizer._requantize_weight  s    d666%%k2 djj.D.D.FG	!"--1G1G1IJ
rzz4::+A+A+CD$RYY

0F0F0HI
 2goI^

%%j1

%%m4 --66w?ww ::it{{/S/ST]TgTg/hi**55h6F6Fz6WY[YfYfg

"">2 1 ii
 	

""<0ra   c           
         || j                   v r| j                   |   j                  S | j                   |   j                  }t        || j                  j                               }t        |      }|| j                   v r| j                   |   j                  }n5|| j                  v r| j                  |      \  }	}}	}	}	nt        d| d      t        || j                  j                               }
t        |
      }| j                   |   j                  }t        || j                  j                               }|t        j                  j                  |      nd}| j                  }||j                  r|j!                         sy| j"                  t$        j&                  j(                  fv rRt        || j                  j                               }| j+                  |||||      \  }}|r| j-                  ||       |}| j/                  ||||      \  }}}}}}|| j                   vsJ t1        ||||t2        j4                  |j                  dkD  rdnd||      }|| j                   |<   |S )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        z	Expected z5 to be in quantized value map for static quantizationNr   r   )	node_type
node_qtype)rB   r\  r  r   r&   r   r   r=   r"  r;   r,  rV   r^  r_  rF   rK  r}   rH   r   r   r   rY  rn  quantize_bias_static_implr   r   Initializer)rE   r;  r   r:  betaweight_scale_nameweight_initializerr8  r   r   inputscale_initializerr7  weight_zp_nameweight_zp_initrj  rH  bias_initializer
did_updatenew_weight_scalequantized_bias_namequantized_bias_scale_namequantized_bias_zp_namebias_scale_datarp  rq  quantized_values                             rQ   quantize_bias_staticz"ONNXQuantizer.quantize_bias_static  st    000++I6=== !44[ALL)*;TZZ=S=S=UV,-?@ 111#77
CNN4333+/+H+H+T(AAqy4ijkk!-.>

@V@V@X!Y+,BC 11+>FF%ndjj6L6L6NOJXJdD--66~Fjn)))!&&%))+!!j&<&<&A&A%CC+Itzz7M7M7OP+/+S+S ,(J( ''5EF/ **9k<QUV	
%"  8 8888(%"** %%)At!	
 />  +""ra   c                 ^    || j                   v xs || j                  v xs || j                  v S )zq
        only check for value info and newly generated tensor names, initializers are checked separately
        )r+   r7   rD   r   s     rQ   contains_tensorzONNXQuantizer.contains_tensorJ  s=    
 D,,, ;t000;t999	
ra   c           	      2    | j                  ||dddd|      S )NFr  r8   indicesinitializer_use_weight_qTyperG   op_level_per_channelr`  from_subgraph_ONNXQuantizer__quantize_inputs)rE   r8   r  r  s       rQ   quantize_activationz!ONNXQuantizer.quantize_activationT  s/    %%).!&' & 
 	
ra   c           	      2    | j                  ||d||||      S )NTr  r  )rE   r8   r  rG   r  r`  r  s          rQ   quantize_weightzONNXQuantizer.quantize_weighta  s1     %%)-%!5' & 
 	
ra   c           
         g }g }	g }
g }|D ]4  }|j                   |   }|| j                  v ra| j                  |   }|j                  |j                         |	j                  |j                         |
j                  |j
                         |s4|
j                  d       |j                  d       |	j                  d       t        || j                  j                               }|| j                  r=|r;| j                  |j                  |r| j                  n| j                  ||      \  }}}n/| j                  ||r| j                  n| j                  |      \  }}}|
j                  |       |	j                  |       |j                  |       | j                  |      r| j                  j!                  |dz   | j"                  | j                  j%                               }||j                   |   }|| j&                  v rr| j&                  |   }|j)                  d      sJ d| d       |j*                  j)                  d      sJ d| d       |j*                  j,                  j.                  }n(|| j0                  v sJ d|d	       | j0                  |   }| j3                  ||| j                  |
      }| y|r| j5                  |       n|j7                  |       |d   }|j8                  dk(  rY|
j7                  |j:                         |j                  |j                   d          |	j                  |j                   d          N|
j                  |j:                  d          |j                  |j:                  d          |	j                  |j:                  d          | j<                  f| j<                  j?                  ||g||||d      \  }}}}|
j                  |d          |j                  |d          |	j                  |d          tA        d| d| jB                          |
|	||fS )a  
        Given a node, this function quantizes the inputs as follows:
            - If input is an initializer, quantize the initializer data, replace old initializer
              with new initializer
            - Else, add QuantizeLinear nodes to perform quantization
            parameter node: node being quantized in NodeProto format.
            parameter indices: input indices to quantize.
            return: (List of quantized input names,
                     List of zero point names used for input quantization,
                     List of scale names used for input quantization,
                     List of new QuantizeLinear nodes created)
        r   r$  rf   zvalue_info=z has no type.r   z is not a tensor.zshape inference failed for zF and attribute 'tensor_names' does not have any value for this tensor.r&  )NNNNr  ry   r      r   T)r  rG   r  r`  r  z!Invalid tensor name to quantize: z @graph scope)"r.   rB   r   r  r,  r\  r   r&   r   rF   quantize_weight_per_channelr*   rH   rI   quantize_initializerr  find_node_by_namer5   r(   r+   r   rf   r   r   r7   r/  r   rn   rk   r-   rZ   r  r;   r6   )rE   r8   r  r  rG   r  r`  r  scale_nameszero_point_namesquantized_input_namesr~   r'  
node_inputr  r   q_weight_namer,  r  r-  r   r)   r   quantize_input_nodesparent_quantized_input_namesparent_zero_point_namesparent_scale_namesr   s                               rQ   __quantize_inputszONNXQuantizer.__quantize_inputst  sd   .  "" c	rKK0J T555"&":"::"F""?#=#=> ''(?(?@%,,_-C-CD%,,R0""2& ''+&z4::3I3I3KLK&##(<
 88#((-I))tOdOd$		%" :>9R9R#-I))tOdOd$:6M7J &,,]; ''0"":.%%j1#zz;;!22DNNDJJDTDTDV   '!%K!8J!T%5%55%)%5%5j%A
)226:ck*Ub<cc:)77Fs+V`UaarHssF'1'B'B'L'L  *T->->> 9* H+ ,>
 (,'8'8'D+/+I+Ik4+@+@| ,J ,( ,37$**+?@%9:#7#;L''+;;)001D1DE&&|'9'9!'<=$++L,>,>q,AB)001D1DQ1GH&&|':':1'=>$++L,?,?,BC( KK11 M1M!-)="& 2 0+& &,,-I!-LM""#5a#89 ''(?(BC !#DZLP]^b^n^n]o!pqqGc	rJ %&6UJJra   c                 f   |j                   | j                  v r<| j                  |j                      }|j                  |j                  |j                  fS | j                  ||||      \  }}}t        |j                   |||t        j                  d      }|| j                  |j                   <   |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        N)	r*   rB   r\  r,  r  quantize_initializer_implr   r   rs  )	rE   r   r   rG   keep_float_weightr  r  r,  r  s	            rQ   r  z"ONNXQuantizer.quantize_initializer  s     ;;$222"66v{{CO&&''**  .2-K-KE<):.
*w

 )KK**
 1@  -gz11ra   c                    || j                   v r2| j                   |   }|j                  |j                  |j                  fS | j	                  |||||      \  }}}	t        |||	|t        j                  d       }|| j                   |<   |||	fS r   )rB   r\  r,  r   quantize_weight_per_channel_implr   r   rs  )
rE   r:  rH   channel_axisrG   r  r  r  r,  r  s
             rQ   r  z)ONNXQuantizer.quantize_weight_per_channel  s     $222"66{CO&&''**  .2-R-R|\CT.
*w
 )**
 1@  -gz11ra   c                    || j                   v rf|| j                  vrW| j                   |   }t        |j                  | j                  j                               }| j                  j                  j                  dk7  s%| j                  j                  j                  dk(  r2|0|.t        j                  j                  |      j                  dk(  sJ |dz   }| j                  j                  || j                  | j                  j                               }|H|j                  |j                  |j                  g}t        j                   j#                  d||g|      }|S ||j$                  d   k(  sJ y)a  
        Given a value (input/output) which is quantized, add a DequantizeLinear node to dequantize
        it back to float32 or float16
            parameter value_name: value to dequantize
            parameter new_nodes_list: List of new nodes created before processing current node
            return: None if there is already a DequantizeLinear node that dequantizes it
                    A DequantizeLinear node otherwise
        rS   Nr   _DequantizeLinearrz   r   )rB   rD   r   r  r&   r   rT   rV   r^  r_  rK  r  r5   r(   r\  r,  rW   ro   r-   )rE   
value_namer  rg  dqlinear_namedqlinear_nodedqlinear_inputsdequantize_nodes           rQ   _dequantize_valuezONNXQuantizer._dequantize_value8  s\    $2224KeKe9e"66zBO &o&@&@$**BXBXBZ[J zz--1AA

  ..2BBzG] ")T->->-G-G
-S-X-X\]-]]]&)<<M JJ88X\XbXbXhXhXjkM$#**#..#++#
 #'++"7"7&*}# '& "]%9%9!%<<<<ra   c                     | j                   j                         j                  D ];  }| j                  |j                        }|!| j
                  j                  |       = y)z
        Dequantize output if it is quantized
            parameter new_nodes_list: List of new nodes created before processing current node
            return: List of new nodes created
        N)r&   r(   r-   r  r*   r5   r   )rE   r-   r  s      rQ   r   z!ONNXQuantizer._dequantize_outputs_  sR     jj&&(// 	7F"44V[[AO*%%o6	7ra   c           	      F   | j                   y | j                          i }| j                   D ]q  }| j                   |   }t        |t              st	        dt        |       d|d      | j                  j                  |i       }| j                  }d|v r|d   j                  }d|v rd|v r|d   |d   }}n|t        j                  j                  k(  rt        ||j                  d         \  }}n|j                  d	|j                   d
         }|j                  d|j                   d         }	|j                  d| j"                        }
|j                  dd      }t%        |||
      \  }}t'        ||	|||
| j(                        \  }}t+        |||      ||<   t |S )Nr   r  r   )default_valr  r  r  r   rV  r   rW  	symmetricrG   F)rG   r  )r  r  r  )rJ   adjust_tensor_rangesr  r   r	  rf   tensor_quant_overridesget_per_tensor_overridesrI   r   rV   r   FLOAT8E4M3FNr   avg_stdgetrange_valueis_activation_symmetricr   r   min_real_ranger   )rE   r=   r   tdquant_overridesr  zeror  rV  rW  r  rG   qminqmaxs                 rQ   r<   z+ONNXQuantizer.calculate_quantization_paramsk  s   %!!# -- 	wK##K0Bb*-"248*E+PQ RSS"99RRS^lnRoO..J.,\:FF
/)lo.M-l;_W=Uet//<<<5j"**Q-Pe&**62>>!3DE&**62>>!3DE+//T=Y=YZ	.22>5I4Zlfop
d.tT4yRVReRefe/ATY^ku/v,/	w2 #"ra   r   )F)NN)NNN)g      ?)FFr  F)TFFr  F)FF)TF)'__name__
__module____qualname__r%   r`   rv   r   r   r   r[   r   r   r   r   r   r   r"  r/  r1  rF  r
  ndarrayr   rV   r   booltuplerY  rn  r  r  r  r  r  r  r  r  r   r<    ra   rQ   r   r   &   sZ    FMR/> fD
<+ Z
"22ARAh\7|9Px aeD&L" ##J6%ZZ6% jj6% 	6%
 !!6% 6% 
tRZZ$&&	'6%p$1c $1bjj $1T $1LF#P
	
" "
. &*"AKF2L 2@%N
7 #ra   r   )#r   numpyr
  rV   onnx.numpy_helperr   r   base_quantizerr   r   	calibrater   
onnx_modelr   quant_utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   registryr   r   r  ra   rQ   <module>r     sK        & = ! !    & (e#M e#ra   