
    pi                         d dl Z d dlZd dlZddlmZmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ d Zd	 Zd
 Z G d de      Z G d de      Zy)    N   )TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypeattribute_to_kwargfind_by_nameget_mul_node	ms_domain   )QuantOperatorBase)	QOpMatMul)QDQOperatorBasec                     | j                   D cg c]  }|j                  dk(  s| }}|r%t        j                  j	                  |d         dkD  S yc c}w )NtransBr   F	attributenameonnxhelperget_attribute_value)	gemm_nodeattrtransB_attributes      i/opt/services/ai/voice_agent/venv/lib/python3.12/site-packages/onnxruntime/quantization/operators/gemm.pyis_B_transposedr      sS    )2)<)<V		X@UVV{{../?/BCaGG	 Ws
   AAc                     | j                   D cg c]  }|j                  dk(  s| }}|r"t        j                  j	                  |d         S yc c}w )Nbetar         ?r   r   r   beta_attributes      r   get_betar!      sL    '0':':Rtdii6>QdRNR{{..~a/@AA	 Ss
   AAc                 x    | j                   D cg c]  }|j                  dk(  s| }}|r
d|d   _        yc c}w )Nr   r   r   )r   r   fr   s      r   set_default_betar$   $   s?    '0':':Rtdii6>QdRNR!q	 Ss   77c                   (     e Zd Z fdZ fdZ xZS )QLinearGemmc                 &    t         |   ||       y Nsuper__init__selfonnx_quantizer	onnx_node	__class__s      r   r+   zQLinearGemm.__init__-       3    c           	      &   | j                   }|j                  dk(  sJ | j                  j                  |j                  d         \  }}}}}| j                  j                  |j                  d         r| j                  j                         r| j                  j                  |dg      \  }}}}	| j                  j                  |j                  d   | j                  j                  t        |      rdnd      }
|j                  |
d          |j                  |
d          |j                  |
d          n| j                  j                  |dg      \  }}}}	| j                  j                  |dg| j                  j                        \  }}}}|j                  |       |j                  |       |j                  |       |	j                  |       |r|t         | E         S d}t%        |j                        dk(  r| j                  j                  |j                  d         st         | E         S | j                  j'                  |j                  d   |j                  d   |j                  d   t)        | j                               }|j                  d   t*        z   }|j,                  r|j,                  dz   nd}i }|j.                  D ],  }|j,                  d	k7  s|j1                  t3        |             . t4        |d
<   g }t7        d      D ]  }|j                  ||   ||   ||   g       ! |j                  |||g       t9        j:                  j<                  d||g|fi |}|	j                  |       t?        |j                  d   |||t@        jB                  |j                  | j                  j                        }|| j                  jD                  |j                  d   <   | j                  xjF                  |	z  c_#        y )NGemmr   r   r   )reduce_range    _quantr   domainQGemm)	node_type
node_qtype)$nodeop_type	quantizer_get_quantization_paramsoutputis_input_a_initializerinputis_per_channelquantize_activationquantize_weight_per_channelweight_qTyper   appendquantize_weightr5   extendr*   quantizelenquantize_bias_staticr!   r   r   r   updater   r
   ranger   r   	make_noder   r   Inputquantized_value_map	new_nodes)r-   r=   
data_foundoutput_scale_nameoutput_zp_name_quantized_input_nameszero_point_namesscale_namesnodesquant_weight_tuplequantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightquantized_bias_nameqgemm_output
qgemm_namekwargsr   qgemm_inputsi
qgemm_nodeq_outputr0   s                           r   rK   zQLinearGemm.quantize0   s   yy||v%%% NN33DKKNC	
 >>00A?DNNDaDaDc 224!=% !%!K!K

1++$T*"
 "(();A)>?##$6q$9:1!45 224!=%  ..taSt~~GbGb.c,'"!(()EF##$;<12LL&2:7#%% tzz?a>>88AGw')) #'.."E"E

1tzz!}djjmXdii=P# {{1~(@@-1YYTYY)B
 	=I~~'0;<	= %x q 	aA!6q!9;q>K[\]K^ _`	a 	02C^TU[[**7L<.R\g`fg
Z  "KKN$$ll~~22
 >F**4;;q>:  E) r2   __name__
__module____qualname__r+   rK   __classcell__r0   s   @r   r&   r&   ,   s    4\* \*r2   r&   c                   $     e Zd Z fdZd Z xZS )QDQGemmc                 &    t         |   ||       y r(   r)   r,   s      r   r+   zQDQGemm.__init__   r1   r2   c           	         | j                   }|j                  dk(  sJ | j                  j                  |j                  d          | j
                  s(| j                  j                  |j                  d          | j                  j                  |j                  d   t        |      rdnd      \  }}|r*| j                  j                  |j                  d   |       n(| j                  j                  |j                  d          t        |j                        dk(  r| j                  j                  |j                  d         ry| j                  j                  |j                  |j                  d   |j                  d   |j                  d   t        | j                                t!        | j                          y t#        j$                  d| j                   j                   d       y y )	Nr4   r   r   )default_axisr7   r   zBias of Gemm node 'zC' is not constant. Please exclude this node for better performance.)r=   r>   r?   quantize_activation_tensorrC   disable_qdq_for_node_outputrA   is_tensor_per_channelr   "quantize_weight_tensor_per_channelquantize_weight_tensorrL   rB   quantize_bias_tensorr   r!   r$   loggingwarning)r-   r=   is_weight_per_channelweight_axiss       r   rK   zQDQGemm.quantize   so   yy||v%%%11$**Q-@//NN55dkk!nE-1^^-Q-QJJqM_T-B .R .
*{ !NN==djjm[YNN11$**Q-@tzz?a~~44TZZ]C33IItzz!}djjmTZZ]HUYU^U^L_ !+)$))..)99|}  r2   ri   rn   s   @r   rp   rp      s    4r2   rp   )rz   numpynpr   quant_utilsr   r   r   r   r   r	   r
   base_operatorr   matmulr   qdq_base_operatorr   r   r!   r$   r&   rp    r2   r   <module>r      sR         -  .`*) `*Fo r2   