Tencent · tpoisonooo · Jun 13, 2022 · Jun 13, 2022 · Jun 15, 2022 · Jun 15, 2022
diff --git a/benchmark/benchncnn.cpp b/benchmark/benchncnn.cpp
@@ -320,6 +320,10 @@ int main(int argc, char** argv)
 
     benchmark("vision_transformer", ncnn::Mat(384, 384, 3), opt);
 
+    benchmark("FastestDet", ncnn::Mat(352, 352, 3), opt);
+
+    benchmark("vision_transformer_int8", ncnn::Mat(384, 384, 3), opt);
+
 #if NCNN_VULKAN
     delete g_blob_vkallocator;
     delete g_staging_vkallocator;

diff --git a/benchmark/vision_transformer_int8.param b/benchmark/vision_transformer_int8.param
diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md
@@ -1054,9 +1054,10 @@ y = affine(out)
 | 0         | embed_dim     | int   | 0         |                   |
 | 1         | num_head      | int   | 1         |                   |
 | 2         | weight_data_size| int | 0         |                   |
+| 3         | int8_scale_term| int | 0         |                   |
 
-| weight        | type  | shape                 |
-| ------------- | ----- | --------------------- |
+| weight        | type  | shape                 | description |
+| ------------- | ----- | --- | --------------------- |
 | q_weight_data | float/fp16/int8 | [weight_data_size] |
 | q_bias_data   | float | [embed_dim]           |
 | k_weight_data | float/fp16/int8 | [weight_data_size] |
@@ -1065,6 +1066,14 @@ y = affine(out)
 | v_bias_data   | float | [embed_dim]           |
 | out_weight_data| float/fp16/int8 | [weight_data_size] |
 | out_bias_data | float | [embed_dim]           |
+| q_input_scale | float | [1] |
+| k_input_scale | float | [1] |
+| v_input_scale | float | [1] |
+| q_weight_scales | float | [embed_dim] |
+| k_weight_scales | float | [embed_dim] |
+| v_weight_scales | float | [embed_dim] |
+| internal_scales | float | [5] | scales for xq/xk/xv/before_softmax/before_output |
+
 
 # MVN
 ```

diff --git a/docs/how-to-use-and-FAQ/quantized-int8-inference.md b/docs/how-to-use-and-FAQ/quantized-int8-inference.md
@@ -20,7 +20,7 @@ Some imagenet sample images here https://github.com/nihui/imagenet-sample-images
 
 ```shell
 find images/ -type f > imagelist.txt
-./ncnn2table mobilenet-opt.param mobilenet-opt.bin imagelist.txt mobilenet.table mean=[104,117,123] norm=[0.017,0.017,0.017] shape=[224,224,3] pixel=BGR thread=8 method=kl
+./ncnn2table mobilenet-opt.param mobilenet-opt.bin imagelist.txt mobilenet.table mean=[104,117,123] norm=[0.017,0.017,0.017] shape=[224,224,3] pixel=BGR thread=8 method=kl format=txt
 ```
 
 * mean and norm are the values you passed to ```Mat::substract_mean_normalize()```
@@ -35,6 +35,7 @@ find images/ -type f > imagelist.txt
 * pixel is the pixel format of your model, image pixels will be converted to this type before ```Extractor::input()```
 * thread is the CPU thread count that could be used for parallel inference
 * method is the post training quantization algorithm, kl and aciq are currently supported
+* format is the output file type of quantization parameters, choose `ini` for `txt`. Using `txt` by default
 
 If your model has multiple input nodes, you can use multiple list files and other parameters
 
@@ -60,7 +61,7 @@ mobilenet.load_model("mobilenet-int8.bin");
 
 ## mixed precision inference
 
-Before quantize your model, comment the layer weight scale line in table file, then the layer will do the float32 inference
+Before quantize your model, comment layer weight scale line in the table file with `txt` format, then the layer will do the float32 inference
 
 ```
 conv1_param_0 156.639840536
@@ -69,3 +70,26 @@ conv1_param_0 156.639840536
 ```
 #conv1_param_0 156.639840536
 ```
+
+If you are using `ini` format, just remove whole quantization parameters of the layer, for example:
+
+```
+[conv0]
+type = "Conv"
+weight = [ 156.639840536 ]
+input_scale = 1.23
+
+[fire]
+type = "Gemm"
+weight = [ 156.639840536 ]
+input_scale = 1.23
+```
+
+to
+
+```
+[fire]
+type = "Gemm"
+weight = [ 156.639840536 ]
+input_scale = 1.23
+```