
    pi                       d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlZd dlmZmZmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZmZmZ 	 d d
lmZ 	 d dlmZm Z  	 d dl!m"Z" dZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,i Z- e.e      D  ci c]  }  e/ e0e|       e1      s e0e|       |  c} Z2 G d de      Z3 G d de      Z4 G d de      Z5 G d de      Z6ej                  jn                   e
jp                  d      ej                  jr                   e
jp                  d       ej                  jt                   e
jp                  d!      ej                  jv                   e
jp                  d"      ej                  jx                  eej                  jz                  eej                  j|                  e iZ?ej                  jr                   e
j                  d e
j                  #       e
j                  d$e
j                  #      fej                  jn                   e
j                  d%e
j                  #       e
j                  d&e
j                  #      fej                  jv                   e
j                  d e
j                  #       e
j                  d'e
j                  #      fej                  jt                   e
j                  d(e
j                  #       e
j                  d)e
j                  #      fej                  j|                   e
j                  d e #       e
j                  d*e #      fej                  jz                   e
j                  d+e#       e
j                  d,e#      fiZEej                  jr                   e
j                  d e
j                  #       e
j                  d-e
j                  #      fej                  jn                   e
j                  d.e
j                  #       e
j                  d&e
j                  #      fej                  jv                   e
j                  d e
j                  #       e
j                  d/e
j                  #      fej                  jt                   e
j                  d0e
j                  #       e
j                  d)e
j                  #      fiZFej                  jr                   e
j                  d e
j                  #       e
j                  d&e
j                  #      fej                  jn                   e
j                  d1e
j                  #       e
j                  d2e
j                  #      fej                  jv                   e
j                  d e
j                  #       e
j                  d)e
j                  #      fej                  jt                   e
j                  d3e
j                  #       e
j                  d4e
j                  #      fej                  j|                   e
j                  d e#       e
j                  d,e#      fej                  jz                   e
j                  d5e#       e
j                  d6e#      fiZGd7d8d9ZHdcd:ZIddd;ZJd< ZK	 	 	 	 de	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dfd=ZL	 de	 dgd>ZM	 	 dc	 	 	 	 	 	 	 	 	 	 	 	 	 dhd?ZNdid@ZOdidAZPdjdBZQdkdCZR G dD dE      ZS G dF dG      ZT G dH dI      ZUdJ ZVdK ZWdL ZXdM ZYdldNZZdO Z[dmdPZ\dndQZ]dodRZ^dpdSZ_dqdTZ`drdUZadqdVZbdrdWZcdsdXZddtdYZedudZZfdvd[Zgdwd\Zhdxd]Zidxd^Zjdyd_Zkdyd`ZldydaZmdydbZny# e$ r dZY w xY w# e$ r dZdZ Y w xY w# e$ r dZ"Y w xY wc c} w )z    )annotationsN)Enum)Path)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptionsfloat8e4m3fn)int4uint4)to_array_extendedzonnx.quantizez0.1.0ai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedl        c                  *    e Zd ZdZdZd Zed        Zy)QuantizationModer      c                    | j                   S Nnameselfs    f/opt/services/ai/voice_agent/venv/lib/python3.12/site-packages/onnxruntime/quantization/quant_utils.py__str__zQuantizationMode.__str__D       yy    c                D    	 t         |    S # t        $ r t               w xY wr!   )r   KeyError
ValueError)modes    r&   from_stringzQuantizationMode.from_stringG   s)    	#D)) 	,	    N)__name__
__module____qualname__
IntegerOps
QLinearOpsr'   staticmethodr.    r)   r&   r   r   @   s%    JJ  r)   r   c                  *    e Zd ZdZdZd Zed        Zy)QuantizedValueTyper   r   c                    | j                   S r!   r"   r$   s    r&   r'   zQuantizedValueType.__str__S   r(   r)   c                D    	 t         |    S # t        $ r t               w xY wr!   )r8   r+   r,   )vs    r&   r.   zQuantizedValueType.from_stringV   s)    	%a(( 	,	r/   N)r0   r1   r2   InputInitializerr'   r5   r.   r6   r)   r&   r8   r8   O   s%    EK  r)   r8   c                  N    e Zd ZdZdZdZdZdZdZdZ	d Z
ed	        Zed
        Zy)	QuantTyper   r                  c                    | j                   S r!   r"   r$   s    r&   r'   zQuantType.__str__g   r(   r)   c                D    	 t         |    S # t        $ r t               w xY wr!   )r?   r+   r,   )ts    r&   r.   zQuantType.from_stringj   s(    	Q< 	,	r/   c                
   | t         j                  k(  rt        j                  S | t         j                  k(  rt        j
                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S t!        d| d      )NzUnexpected value qtype=.)r?   QInt8r   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r,   r$   s    r&   tensor_typezQuantType.tensor_typeq   s    9??"###9###$$$9$$$%%%9###$$$9***+++9###$$$9??"###24(!<==r)   N)r0   r1   r2   rJ   rL   rR   rP   rN   rV   rT   r'   r5   r.   propertyrX   r6   r)   r&   r?   r?   ^   sR    EFMFGEF   > >r)   r?   c                  *    e Zd ZdZdZd Zed        Zy)QuantFormatr   r   c                    | j                   S r!   r"   r$   s    r&   r'   zQuantFormat.__str__   r(   r)   c                D    	 t         |    S # t        $ r t               w xY wr!   )r[   r+   r,   )formats    r&   r.   zQuantFormat.from_string   s)    	v&& 	,	r/   N)r0   r1   r2   	QOperatorQDQr'   r5   r.   r6   r)   r&   r[   r[      s%    I
C  r)   r[   int8uint8int16uint16dtype   i   i  i i     i      ii  ii@   i i @  rA   zero_point_indexc                @   g }t        |      D ]  \  }}t        j                  t        |      t        j                        r%|j                  t        j                  |             n=t        |t        j                        r|j                  |       nt        d| d|       || k(  s|d   }|j                  t        j                  k(  s|j                  t        j                  k(  st        d|j                          t        |      dkD  rt        |      S |d   S )Nzarg z is not an array: rn   zzero_point cannot be r   r   )	enumeratenumpy
issubdtypetypenumberappendarray
isinstancendarray	TypeErrorrf   float32float16lentuple)rp   argsnew_argsiar;   s         r&   _check_typer      s    H$ 
C1DGU\\2OOEKKN+5==)OOAd1#%7s;<<  Aww%--'177emm+C"7y ABB
C "(ma/5?@Xa[@r)   c                   | t         v sJ d|  d       | t        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j                  fv r/|dk7  rt        d|d      |j                  t        j                  k(  rt        j                  }nG|j                  t        j                  k(  rt        j                  }nt        d|j                   d      t        t!        t#        dg dgt$        j&                  j)                  d| g dg      	      t#        d
g ddg      gdt+        d|d       t+        d|d       gt+        d| d       g            }t-        |      }t/        |j1                  d ||d      d         S t         |    }	t3        | dd      \  }
}|t5        |
|      n|
}|t7        ||      n|}t        j8                  |j;                  t        j                        |z  j=                         |z         }t        j>                  ||||       t/        |j;                  |	            S )NUnexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.r   z2zero_point is expected to be null for float 8 not rI   zUnexpected dtype Constant
zero_point)valuer   )Xscaler   Yqur   r   )r   r   F)reduce_range	symmetric)out) ONNX_TYPE_TO_NP_TYPE
onnx_protor   rS   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorrf   rs   r|   FLOATr}   FLOAT16r,   r   r
   r   onnxhelpermake_tensorr   r   r   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefrf   qminqmaxcliplowcliphigharr_fp32s                  r&   quantize_nparrayr      s-   (( 
w&de( ++--))--	  ?%(Z[eZhhi&jkk99%#))IYY%--'#++I01=>>"Bdkk>U>UVbdikmpqor>s .0LseT	 *3	4@*7ItD (UD9:

  !,3774sU)CDQGHH %U+,URWX
d$'O#dC.&*&63tT?D==#**U]]";e"C!J!J!Lz!YZ

8WhH=8??5122r)   c           	        |dkD  s|dk  rt        d| d|       t        j                  | t        j                  d| j                              } t        j
                  |t        j                  d|j                              }|.t        || t        j                  || j                        z         }|rBt        j
                  t        j                  |       t        j                  |            }| } |}||k  sJ d|  d|        t        j                  || z
  t        j                        }t        j                  |t        j                        t        j                  |t        j                        z
  }t        j                  ||z        }	|	dk\  sJ d       |	t        j                  |j                        j                  k  rFt        j                  d|j                        }	t        j                  d|j                        }
|
|	gS |r^t        j                  t        j                  ||z   t        j                  d	t        j                        z        |j                        }
n:t        j                  t        j                  || |	z  z
        |j                        }
|	j                  |j                        }	|
|	gS )
a  Calculate the scale s and zero point z for the quantization relation
    r = s(q-z), where r are the original values and q are the corresponding
    quantized values.

    r and z are calculated such that every value within [rmin,rmax] has an
    approximate representation within [qmin,qmax]. In addition, qmin <= z <=
    qmax is enforced. If the symmetric flag is set to True, the interval
    [rmin,rmax] is symmetrized to [-absmax, +absmax], where
    absmax = max(abs(rmin), abs(rmax)).

    :parameter rmin: minimum value of r
    :parameter rmax: maximum value of r
    :parameter qmin: minimum value representable by the target quantization data type
    :parameter qmax: maximum value representable by the target quantization data type
    :parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :return: zero and scale [z, s]

    r   Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:re   zqmin=z > qmax=zscale issue      ?g       @)r,   rs   minimumrx   rf   maximumr   r   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   s              r&   compute_scale_zpr      s    ( ax4!8]^b]ccklpkqrss
 ==u{{1DJJ?@D==u{{1DJJ?@D !4nDJJ OOPuyy		$@ww4<55htf55<	TD[	6B	T	/%++d%--2X	XBKKR EA:$}$:u{{4::&+++Ctzz2[[$**5
   TD[EKK5==,QQRZ^ZdZdJ U[[u1D%ETZZXJTZZ(r)   c                   d}| t         vr| t        j                  k(  rddlm} ddlm} |}t        d      D cg c]
  } ||       }}t        j                  |D cg c]0  }t        j                  |      rt        j                  |      r/|2 c}t        j                        }nt        d|  d      |t         | <   n| t        j                  k(  rddlm} |}|t        d	|  d
      t        j                  t         |          }	t        j                  d|      }
t        j                  ||	z  |j                         }|
|gS c c}w c c}w )ar  Calculate the scale s for a float8 type (E4M3FN).
    The function assumes the coefficient distribution and the float 8
    distribution are similar to two gaussian laws.

    :return: zero and scale [z, s]

    More details in notebook `quantization_fp8.ipynb
    <https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
    Nr   )float8e4m3_to_float32r      re   zQuantization to element_type=z not implemented.zUnexpected element_type rI   )FLOAT8_DISTRIBUTIONSr   rS   onnx.numpy_helperr   #onnx.reference.custom_element_typesr   rangers   rx   isnanisinfr|   r,   r{   stdrf   )element_typer   zp_dtyper   r   r   
all_valuesfvaluesstd_f8zeror   s               r&   compute_scale_zp_float8r   :  s%    H//;333?H#H<A#JGq/2GJG[[&Tqekk!nU[[QR^T\a\i\iF <\NJ[\]]-3\*	11	1D2<.BCCYY+L9:F;;q)DKKfCII6E%=# HTs   E
E8EEc                   t        | t        j                        st        dt	        |        d      ||}nt        |       r| j                         nd}||}nt        |       r| j                         nd}t        j                  || j                        }t        j                  || j                        }t        j                  d| j                        }	|t        j                  k(  r?|rt        d      t        j                  |       }
t        ||
      \  }}	t        ||	d      S |t        j                   t        j"                  t        j$                  t        j&                  t        j(                  t        j*                  fv r_t-        |||	      \  }}t        |       rt/        ||||||      \  }}	n!t        j                  d|j                        }t        ||	d      S t1        d
| d      )a  
    Returns the zero_point and scale for the given data.

    :param data: The data for which to compute quantization parameters.
    :param quant_type: The quantization data type.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: zero point and scale
    z%Weight must be given as an array not rI   g        re   r   z1Unsupported option reduce_range=True for float 8.r   ro   r   z Unexpected value for quant_type=)ry   rs   rz   r{   ru   r~   r   r   rx   rf   r   rS   RuntimeErrorr   r   r   rK   rM   rQ   rO   rW   rU   r   r   r,   )data
quant_typer   r   r   rmin_overridermax_overrider   r   r   r   r   r   r   s                 r&   compute_data_quant_paramsr   _  s   * dEMM*?T
|1MNN  YtxxzC  YtxxzC;;t4::.D;;t4::.DKK4::.E[---RSSiio3JD
E:uqAA  -ZQZ[
dt9 0tT4Tb cJQdjj9J:uqAA
7
|1E
FFr)   c                   t        | ||||||      \  }}|t        j                  k(  rt        || ||      }	t	        |	j                  t        j                        j                         dz  dk(        ret        j                  |       }
t        d|
j                          d|
j                          d|	j                          d|	j                          d	      |||	fS |t        j                  t        j                  t        j                  t        j                   t        j"                  t        j$                  fv rt        || ||      }	|||	fS t'        d| d      )al  
    :param data: data to quantize
    :param qType: data type to quantize to.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: minimum, maximum, zero point, scale, and quantized weights

    To pack weights, we compute a linear transformation

    - when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
    - when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
        `m = max(abs(rmin), abs(rmax))`

    and add necessary intermediate nodes to transform quantized weight to full weight using the equation

    :math:`r = S(q-z)`, where

    - *r*: real original value
    - *q*: quantized value
    - *S*: scale
    - *z*: zero point
    rh   z+One of the quantized value is NaN data in [z, z], quantized_data in [z].zUnexpected value for qType=rI   )r   r   rS   r   anyr   rs   rb   ravelr   r   r   r   rK   rM   rQ   rO   rW   rU   r,   )r   r   r   r   r   r   r   r   r   quantized_datanp_datas              r&   quantize_datar     sZ   8 2J ((()%ujI%%ekk288:S@SHImmD)G=gkkm_Bw{{}o ^&&4&8&8&:%;2n>P>P>R=SSUW  5.00  *%ujI5.00
25';
<<r)   c                   t        |       }d}|t        ||j                         ||      }n|j                  |   }t	        |j                        }	d|	|<   g }
t        |      D ]m  }|j                  ||      }||   }||   }t        ||j                         ||      }|
j                  t        j                  |      j                  |	             o t        j                  |
|      }|r|n| j                   t         }|t        j                  j                   k(  r"t        j                         }||_        |j$                  j'                  | j$                         ||_        |j)                         j+                         j-                         |_        t0        t1        |      }|j                  |j                  k7  s!|j-                         |j-                         k7  r]t3        d|j                   d|j-                         dd  d|j-                         dd  d| j                   dt5        |      dd	  d
      |S |t        j                  j6                  t        j                  j8                  fv r|j:                  t        j<                  t        j>                  fvrt3        d| d      tA        tC        |j-                                     }t        jD                  jG                  ||| j$                  |d      }|S t        jD                  jI                  |      }t        j                  ||      j                  | j$                        }t        jJ                  jM                  ||      }|S )aG  
    Returns a quantized version of the given ONNX initializer.

    :param weight: The ONNX initializer to quantize.
    :param quant_type: The final quantized data type.
    :param zero_point: The zero-point value to use for quantization.
    :param scale: The scale value to use for quantization.
    :param axis: The quantization axis if quantizing per-channel. Defaults to None.
    :param quant_weight_name: The name of the quantized initializer.
                              If not specified, the quantized name is generated.
    :return: The quantized ONNX initializer.
    Nr   zThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   rI   zQuantized weights for z. must be 8-bit before packing as 4-bit values.T)rawre   )'tensor_proto_to_arrayr   r   shapelistr   takerw   rs   r   reshapeconcatenater#   TENSOR_NAME_QUANT_SUFFIXr   r   rS   	data_typedimsextendflattencopytobytesraw_datar   r   strrW   rU   rf   ra   rb   bytespack_bytes_to_4bitr   r   tensor_dtype_to_np_dtypenumpy_helper
from_array)weightr   r   r   axisquant_weight_nameweight_dataq_weight_datachannel_countchannel_dimsquantized_channel_data_listr   channel_datachannel_scalechannel_zero_pointquantized_channel_dataq_weight_nameq_weight_initializercheckpacked_dataquant_np_dtypes                        r&   quantize_onnx_initializerr    s4   ( (/K*.M|([5F5F5H%Q[\#))$/K--.T&(#}% 	lA&++At4L!!HM!+A%5L..0-AS&" (..u}}=S/T/\/\]i/jk	l ))*EtL):%6;;-PhOi@jMT%%222#//1)3&!!((5$1!(5(=(=(?(D(D(F(N(N(P%( &&:;E{{k///5==?mF[F[F]3]"/0A0A/BBc$,,.s34F5==?3B;O:PP[\b\h\h[iS!56t<=Q@ (   
((--t/?/?/E/EF	Fuzz5;;&??!7Ftuvv .}/D/D/FGH  ${{66}jRXR]R]_jpt6u  	 ==jIm>JRRSYS^S^_#00;;M=Yr)   c                j   | t         j                  j                  k(  rt        d      d}|rt        j                  |       }n)|r| t        v r
t        |    }nt        j                  |       }|st        d|  d      |\  }}|dkD  s|dk  r't        d| d| d|j                   d	| d
| d|        |S )z
    Return qmin and qmax, the minimum and maximum value representable by the given qType
    :parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
    :return: qmin, qmax
    z;This function is not implemented for float 8 as not needed.Nr   r   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r   rS   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr,   rf   )r   r   r   qranger   r   s         r&   r   r   )  s     
&&333!"_``F,007	u ==.u5$((/07uvwwJD$ax4!86$x

|?<. Y"8E74
 	
 Mr)   c                .    t        | ||      \  }}||z
  S )z
    Helper function to get the quantization range for a type.
        parameter qType: quantization type.
        return: quantization range.
    r   )r   )r   r   r   r   r   s        r&   get_qrange_for_qTyper  I  s      )	RJD$$;r)   c                :    | dk  r| |z   n| }|dk\  xr ||k  }||fS )z
    Helper function that tries to return a normalized axis in the range [0, rank - 1].
    :parameter axis: The axis to normalize.
    :parameter rank: The tensor rank (number of dimensions).
    :return (is_valid, axis_norm)
    r   r6   )r   rank	axis_normis_valids       r&   normalize_axisr  S  s3      $axtTIA~2)d"2HYr)   c                    t        |       }|dk(  r
t               S |dz   dz  }t        |      }d}d}||dz
  k  r-| |dz      dz  dz  | |   dz  z  ||<   |dz  }|dz  }||dz
  k  r-||k  r| |   dz  ||<   |S )aB  
    Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
    Assumes that the source values are already in the appropriate int4 range.
    :parameter src_8bit: The 8-bit element values to pack.
    :return A bytearray with every two 8-bit src elements packed into a single byte.
    r   r   r@   ri   rB   )r~   	bytearray)src_8bit	num_elemsdst_sizedstsrc_idst_is         r&   r   r   _  s     HIA~{A!#H
H
CEE )a-
	*S0Q68E?S;PQE


 )a-

 ye_s*E
Jr)   c                      e Zd ZdZg g dfdZy)QuantizedInitializerzJ
    Represents a linearly quantized weight input from ONNX operators
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        y r!   )	r#   initializerrminsrmaxszero_pointsscalesr   r   r   )
r%   r#   r%  r&  r'  r(  r)  r   r   r   s
             r&   __init__zQuantizedInitializer.__init__  sF     	&

&	,	r)   r0   r1   r2   __doc__r*  r6   r)   r&   r#  r#  }  s     r)   r#  c                       e Zd ZdZ	 	 	 	 ddZy)QuantizedValuezI
    Represents a linearly quantized value (input\output\intializer)
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        y r!   )	original_nameq_name
scale_namezp_name
value_typer   	node_type
node_qtype
scale_type)
r%   r#   new_quantized_namer2  zero_point_namequantized_value_typer   r5  r6  r7  s
             r&   r*  zQuantizedValue.__init__  sD     "($&.	"$$r)   )NNNNr+  r6   r)   r&   r.  r.    s     %r)   r.  c                      e Zd ZdZd Zy)BiasToQuantizez+
    Represents a bias to be quantized
    c                .    || _         || _        || _        y r!   )	bias_name
input_nameweight_name)r%   r>  r?  r@  s       r&   r*  zBiasToQuantize.__init__  s    "$&r)   Nr+  r6   r)   r&   r<  r<    s    'r)   r<  c                   | j                   dk(  rt        d| j                   d      | j                   dk(  r| j                  }n#| j                   dk(  r| j                  }n| j                   dk(  r| j
                  }n| j                   dk(  r| j                  }n| j                   dk(  r| j                  }n| j                   d	k(  r| j                  }n| j                   d
k(  r| j                  }nz| j                   dk(  r| j                  }n^| j                   dk(  r| j                  }nB| j                   dk(  r| j                  }n&t        d| j                   d| j                    d      | j                  |iS )z
    Convert attribute to kwarg format for use with onnx.helper.make_node.
        :parameter attribute: attribute in AttributeProto format.
        :return: attribute in {key: value} format.
    r   z
attribute z does not have type specified.r   r@   rA   rB   rC   rD   rj      	   r   z has unsupported type rI   )ru   r,   r#   r   r   srG   gfloatsintsstringstensorsgraphs)	attributer   s     r&   attribute_to_kwargrL    s;    ~~:inn%55STUU ~~	1		1		1		1		1	  	1		1	!!	1	!!	2	  :inn%55KINNK[[\]^^NNE""r)   c                t    |D cg c]  }|j                   | k(  s| }}t        |      dkD  r|d   S dS c c}w )z
    Helper function to find item by name in a list.
        parameter item_name: name of the item.
        parameter item_list: list of items.
        return: item if found. None otherwise.
    r   N)r#   r~   )	item_name	item_listitemitemss       r&   find_by_namerR    sA     (Bd499	+ATBEB5zA~58/4/ Cs   55c                R    d}t        t        |            D ]  }||   | k(  s|} |S )zC
    Helper function to return index of an item in a node list
    rn   )r   r~   )	elem_name	elem_listelem_idxr   s       r&   get_elem_indexrW    s9     H3y>" Q<9$H Or)   c                H    t         j                  j                  d| |g|      S )z
    Helper function to create a Mul node.
        parameter inputs: list of input names.
        parameter output: output name.
        parameter name: name of the node.
        return: Mul node in NodeProto format.
    Mul)r   r   r   )inputsoutputr#   s      r&   get_mul_noder\    s!     ;;  $??r)   c                l    | j                   j                  | j                  |z   | j                  z         S )zp
    Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
    )parentjoinpathstemsuffix)filename
identifiers     r&   generate_identified_filenamerd  	  s+     ??##HMMJ$>$PQQr)   c                `   dd l }dd lm} dd l} |j                  |j
                         t        d       t        |        t        d       t        |       |j                  | |d       |j                  d       |j                  d       |j                  d	       |j                          y )
Nr   )	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotrs   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesri  pltrs   s        r&   
apply_plotrw    s    #ES[[1	,	$K	
	*JJtZdJ+JJ~JJxII()HHJr)   c           	     B	   ddl ddl}ddlddlmc mc m} ddlmc mc m} ddl	m
mm t        j                  d|          G fddj                        }j!                  | |      }t#        t$        j&                  j)                  |d      d	      5 }|j+                  |       ddd       j-                  d      }|j/                  d
      }	g }
t1        | j3                               D ]  }| |   }|j5                         }t7        |j9                  d|      j;                               t7        |j9                  d|      j;                               g}t=        t?        |            }|	jA                  |      }|	jA                  |      }|jC                  |	       |jE                  |	|       |jG                  |	|       |jI                  |	      }|
jK                  |        |jM                  |	tO        |
             |
D ]  }|	jQ                  |        |	jS                         }|jU                  |	       |jW                  |	|       |jY                  |	      }|	j[                  |       |	j]                         }t#        t$        j&                  j)                  |d      d      5 }|j+                  |       ddd       t$        j^                  j9                  dd      dv r|j                  ja                  |d      }|jc                         }te        |      D ]Y  }|jg                  |      }t        j                  |ji                                t        j                  |jk                                [ t#        t$        j&                  j)                  |d      d	      5 }t1        | j3                               D ]  }| |   }|j5                         }t7        |j9                  d|      j;                               t7        |j9                  d|      j;                               g}|dz   t=        t?        |            z   }|j+                  |       |j+                  d        	 ddd       y# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   yxY w)z>
    Helper function to write calibration table to files.
    r   N)CalibrationMethod
TensorDataTensorsDatazcalibration cache: c                       e Zd Z fdZy)*write_calibration_table.<locals>.MyEncoderc                N   t        |f      r|j                         S t        |j                        r'|j                         t	        |j
                        ddS t        |      r"|j                  j                  t	        |      dS j                  j                  | |      S )Nznumpy.array)r   rf   CLS)r  r   )
ry   to_dictrz   tolistr   rf   	__class__r0   JSONEncoderdefault)r%   objry  rz  r{  jsonnps     r&   r  z2write_calibration_table.<locals>.MyEncoder.default3  s    #
K89{{}$#rzz* #

s399~m\\#01"}}55CII##++D#66r)   N)r0   r1   r2   r  )ry  rz  r{  r  r  s   r&   	MyEncoderr}  2  s    	7 	7r)   r  )clszcalibration.jsonwi   highestlowestzcalibration.flatbufferswbQUANTIZATION_DEBUG0)r   1zcalibration.cache 
)6r  flatbuffersrs   5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTable"onnxruntime.quantization.calibratery  rz  r{  logginginfor  dumpsopenospathjoinwriterx   Buildersortedkeysr  floatr  rP  r   r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndrw   TrtTableStartDictVectorr~   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironGetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirr  r  r  r  	json_datafiler   builderkey_value_listkeyr   d_valuesrF  r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenr   ry  rz  r{  r  r  s                           @@@@@r&   write_calibration_tabler  "  s   
 LLLL]]LL&'8&9:;7 7D$$ 7 

,)
<I	bggll3 23S	9 T

9 88A;D!!$'GN',,./ )"3'>>#(,,y$/4467(,,x.3356
 CK '',))%0
w'2!!':6((1	i(#)& $$Wc..AB# 3	''	23!!#I7#Wi0$$W-INN9
..
C	bggll3 9:D	A T

3 
zz~~*C0H<%%77Q?	'')x 	,A!q)ILL)LL*+	, 
bggll3 34c	: 
d+0023 		C&s+F~~'Hhll9d388:;hll8T2779:F #ICK 00EJJuJJt		
 
g L 
 
s%   $Q;6R/CR;RRRc                   | dk(  j                  t        j                        }| dk7  j                  t        j                        }|j                         }| j                  |z
  }|sy|t        |      z  t        |      z  }|dk  sJ d| d| d|        | j                  t        j                        }|||z  | |z  z   z  }|dk  j                         dk(  sJ |S )a~  Given a discrete distribution (may have not been normalized to 1),
    smooth it by replacing zeros with eps multiplied by a scaling factor
    and taking the corresponding amount off the non-zero values.
    Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
         https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    r   Nr   zn_zeros=z, n_nonzeros=z, eps1=)r   rs   r|   sumsizer  )pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1rt  s           r&   smooth_distributionr  ~  s     Qu}}-H6//%--0KllnG'!Jw%
"33D#:Q'-
|74&QQ:88EMM"DC(Nte{222DAI??!!!Kr)   c                    t        j                  | j                         d      }t        d |j                  j
                  D              S )NF)load_external_datac              3  F   K   | ]  }t        j                  |        y wr!   )r   uses_external_data).0
intializers     r&   	<genexpr>z*model_has_external_data.<locals>.<genexpr>  s     mz#66zBms   !)r   loadas_posixr   graphr%  )
model_pathmodels     r&   model_has_external_datar    s9    IIj))+FEmUZU`U`UlUlmmmr)   c                    t               }|j                         |_        t        j                  |_        i }dg|d<   t        | j                         |fddgi|}y)z
        Generate model that applies graph optimization (constant folding, etc.)
        parameter model_path: path to the original onnx model
        parameter opt_model_path: path to the optimized onnx model
    :return: optimized onnx model
    ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  opt_model_pathsess_optionkwargs_s        r&   optimize_modelr    sb     !"K+9+B+B+DK(+A+R+RK(F%6$7F !,,.jH^G_jcijAr)   c                    ddi}| j                   r8| j                   D ])  }|j                  |j                  |j                  i       + t        j
                  j                  | |       y)z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater  r   r   r   set_model_props)r  r  props      r&   add_pre_process_metadatar    sZ    .0CDN(( 	:D!!488TZZ"89	:KK~6r)   c                    | j                   r2| j                   D ]#  }|j                  dk(  s|j                  dk(  s# y y)zCCheck the model whether it went through quantization pre-processingr  r  TFr  r  r   )r  r  s     r&   model_has_pre_process_metadatar    sA    (( 	Dxx33

FY8Y	 r)   c                    ddi}| j                   r8| j                   D ])  }|j                  |j                  |j                  i       + t        j
                  j                  | |       y )N
onnx.inferr  r  )r  r  r  s      r&   add_infer_metadatar    sZ    "$78N%% 	4A!!155!''"23	4KK~6r)   c                    | j                   r2| j                   D ]#  }|j                  dk(  s|j                  dk(  s# y y)Nr  r  TFr  )r  r  s     r&   model_has_infer_metadatar    s@    %% 	Auu$4G)G	 r)   c                    | j                   D cg c]   }|j                  r|j                  dk(  s|" }}t        |      dk7  rt        d      |d   j                  }|S c c}w )Nr   r   z$Failed to find proper ai.onnx domainr   )opset_importdomainr~   r,   version)r  opsetai_onnx_domainopset_versions       r&   get_opset_versionr    se    ).););m5<<SXS_S_clSlemNm
>a?@@"1%--M ns
    A A c                   t        |       }|}t        |d|      }|dk  r9|t        j                  j                  k(  rt        j                  d| d       d}n?|dk(  rt        j                  d| d       n |dk  rt        j                  d| d       d}||k7  r+t        j                  j                  | |      } t        |       } | S )	NrX      z$The original model opset version is z, which does not support quantization to float 8. Please update the model to opset >= 19. Automatically update the model to opset 19. Please verify the quantized model.r   ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Automatically update the model to opset 11. Please verify the quantized model.   )
r  getattrr   r   rS   r  warningversion_converterconvert_version&save_and_reload_model_with_shape_infer)r  weight_typer  target_opset_versionweight_quant_types        r&   update_opset_versionr    s    %e,M(]KHr/43C3C3P3PP2=/ B1 1	

  "	"	2=/ BM M	

 
	2=/ B1 1	

  "},&&66u>RS 7u=Lr)   c                    t        | d      }t        j                  j                  t	        |       t	        |             t        j
                  |j                               }t        |       |j                          |S )Nz	-inferred)	rd  r   shape_inferenceinfer_shapes_pathr   r  r  r  unlink)r  inferred_model_pathr  s      r&   load_model_with_shape_inferr    s`    6z;O**3z?C@S<TUII)2245Eu Lr)   c                   t        j                  d      5 }t        j                  |       }t	        |      j                  d      }t        j                  ||j                         d       t        |      cd d d        S # 1 sw Y   y xY w)Nz
ort.quant.)prefixz
model.onnxT)save_as_external_data)
tempfileTemporaryDirectoryr   deepcopyr   r_  r   
save_modelr  r  )r  quant_tmp_dir
model_copyr  s       r&   r  r    sl    		$	$L	9 7]]]5)
-(11,?

J$7$7$9QUV*:6	7 7 7s   A BB
c                   | j                   t        j                  j                  t        j                  j                  fv rt
        j                  j                  |       S t        d| j                   dt        | j                             )Nz&Only float type is supported. Weights z is )r   r   r   r   r   r   r   to_arrayr,   r#   type_to_name)r%  s    r&   r   r     su    !7!7!=!=z?U?U?]?] ^^  ))+66

01A1A0B$|T_TiTiGjFkl r)   c                    | dz   S )N_QuantizeLinearr6   tensor_names    r&   add_quant_suffixr*    s    ***r)   c                    | t         z   S r!   )QUANT_INPUT_SUFFIXr(  s    r&   add_quant_input_suffixr-    s    +++r)   c                    | dz   S )N_QuantizeLinear_Outputr6   r(  s    r&   add_quant_output_suffixr0    s    111r)   c                    | dz   S )N_DequantizeLinearr6   r(  s    r&   add_dequant_suffixr3  !  s    ,,,r)   c                    | dz   S )N_DequantizeLinear_Inputr6   r(  s    r&   add_dequant_input_suffixr6  %  s    222r)   c                    | t         z   S r!   )DEQUANT_OUTPUT_SUFFIXr(  s    r&   add_dequant_output_suffixr9  )  s    ...r)   )NN)FN)FNNN)r   numpy.ndarrayr   onnx.TensorProto.DataTyper   boolr   r<  r   float | Noner   r=  r   r=  returnz#tuple[numpy.ndarray, numpy.ndarray])r>  z2tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray])r   onnx.TensorProtor   r;  r   r:  r   r:  r   z
int | Noner   z
str | Noner>  r?  )FF)r   intr  r@  r>  ztuple[bool, int])r  r   r>  r  )rb  r   rc  r   r>  r   )rI   )g-C6?)r  r   )r  r   r  r   )r  r   )r  r   r>  r<  )r  r   r>  r@  )r  r   r  r?   r>  r   )r  r   r>  r   )r  r   r>  r   )r%  r   r>  r:  )r)  r   r>  r   )r>  r   )o
__future__r   r   r  r  r  enumr   pathlibr   rs   r   r   r   r   r	   r   onnx.helperr
   r   r   r   onnx.referencer   onnxruntimer   r   r   r   r   ImportErrorr   r   onnx.reference.op_runr   __producer____version__onnx_domain	ms_domainQUANT_OP_NAMEr,  DEQUANT_OP_NAMEr8  r   MODEL_SIZE_THRESHOLDr   r  ry   r  r@  r%  r   r8   r?   r[   rK   rf   rM   rQ   rO   rS   rW   rU   r   rx   rb   ra   rd   rc   r  r  r  r   r   r   r   r   r   r  r   r  r  r   r#  r.  r<  rL  rR  rW  r\  rd  rw  r  r  r  r  r  r  r  r  r  r  r  r  r   r*  r-  r0  r3  r6  r9  )ks   0r&   <module>rQ     sL   #   	      > > & Q Q - P P@?
7 	 , $2 ' !  474Dqq
SZ[fhiSjloHpQ'*qt  #> #>L$   V!4  +%++g"6  +%++g"6!!;5;;x#8''  %    ;5;;q#DkekkRU]b]h]hFi"j+%++d%**"E{u{{SV^c^h^hGi!j!!KEKK$FTYafamamHn#o  ;5;;vU[[#I;5;;W\didodoKp"q  ;5;;q#>BV[@\"]+%++b"={u{{1TX?Y!Z    ;5;;q#DkekkRU]b]h]hFi"j+%++d%**"E{u{{SV^c^h^hGi!j!!KEKK$FTYafamamHn#o  ;5;;vU[[#I;5;;W\didodoKp"q	!    ;5;;q#DkekkRU]b]h]hFi"j+%++c"DkekkRT\a\f\fFg!h!!KEKK$FTYafamamHn#o  ;5;;vU[[#I;5;;W\didodoKp"q  ;5;;q#={u{{1TX?Y"Z+%++b"={u{{1TX?Y!Z  )+ A 13h<~"R #'"&"&;G
;G);G ;G 	;G
 !;G  ;G  ;G );G~ hl:=7:=D $(L L )L  L  	L 
 L  "L  L ^@	< >% %8' '"#J0@R$Yx2n
k 77!H7+,2-3/_   L  DE  $ rsB   ] ] (]! ]/$]/]]	]]!],+],