Merge pull request #33 from deepika7497/training

PR to merge changes for input sparsity for both digital and analog versions
Aayush-Ankit · Jun 15, 2020 · 942cc37 · 942cc37
2 parents 4a5ab26 + af3516a
commit 942cc37
Show file tree

Hide file tree

Showing 12 changed files with 504 additions and 97 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,6 @@
 *.pyc
 *.sw*
-
+*.log
 /test/testasm/
 /test/traces/
 /test/coreMvm_test/

diff --git a/how_to_run.md b/how_to_run.md
@@ -82,7 +82,9 @@ cp -R <example> puma-simulator/test/testasm/
 
 #### 6.1 - Setup config file :
 
-Config file - ```puma-simulator/include/config.py```.
+Use the appropriate config file from ```puma-simulator/include/example-configs/(config file name)```.
+For example: for mlp use ```config-mlp.py```.
+Copy the file to ```puma-simulator/include/``` and rename it to ```config.py```. 
 
 Update ```num_tile_compute``` in config file based on the number of tiles generated in your ```<example>``` model.
 

diff --git a/include/config.py b/include/config.py
@@ -7,6 +7,13 @@
 xbar_record = 1
 inference = 1
 training = not(inference)
+sparse_opt = 1 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
+
+## Variable to define the type of MVMU
+# One of "Analog", "Digital_V1" or "Digital_V2" 
+# Digital_V1 has compressed inputs (Data+Offset style)
+# Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
+MVMU_ver = "Analog"
 
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
@@ -30,7 +37,9 @@
 data_width = num_bits # (in bits)
 xbdata_width = data_width # (in bits)
 instrn_width = 48 # (in bits)
-
+# Input and Weight parameters
+input_prec = 16
+weight_width = 16
 # Change here - Specify the IMA parameters here
 xbar_bits = 2
 num_matrix = 2 # each matrix is 1-fw logical xbar for inference and 1-fw, 1-bw, and 1 delta logical xbar for training. Each logical xbar for inference is 8-fw physical xbar and for training  8-fw, 8-bw and 16-delta physical xbars.

diff --git a/include/constants.py b/include/constants.py
@@ -2,6 +2,7 @@
 ## It also holds power, area and latency numbers of different component used in DPE design
 import config as cfg
 import math
+import constants_digital as digi_param
 # Limits the number of cycles an IMA runs in case it doesn't halt
 infinity = 100000
 
@@ -145,42 +146,47 @@
                  '16': 1.67 * 10**(-7)}
 
 # ADC - Discuss exact values with ISSAC authors
+# ADC Values for including sparsity
 adc_lat_dict = {'1' : 12.5,
                 '2' : 25,
+                '3' : 37.5,
                 '4' : 50,
-		'5' : 62.5,			
-		'6' : 75,
-		'7' : 87.5,
+                '5' : 62.5,
+                '6' : 75,
+                '7' : 87.5,
                 '8' : 100,
 		'9' : 112.5,
                 '16': 200}
 
 adc_pow_dyn_dict = {'1' : 0.225,
                     '2' : 0.45,
+                    '3' : 0.675,
                     '4' : 0.9,
-		    '5' : 1.125,			
-		    '6' : 1.35,
-		    '7' : 1.575,
+                    '5' : 1.125,
+                    '6' : 1.35,
+                    '7' : 1.575,
                     '8' : 1.8,
 		    '9' : 2.025,
                     '16': 3.6}
 
 adc_pow_leak_dict = {'1' : 0.025,
                      '2' : 0.05,
+                     '3' : 0.075,
                      '4' : 0.1,
-		     '5' : 0.125,			
-		     '6' : 0.150,
-		     '7' : 0.175,
+                     '5' : 0.125,
+                     '6' : 0.15,
+                     '7' : 0.175,
                      '8' : 0.2,
 		     '9' : 0.225,
                      '16': 0.4}
 
 adc_area_dict = {'1' : 0.0012,
                  '2' : 0.0012,
+                 '3' : 0.0012,
                  '4' : 0.0012,
-		 '5' : 0.0012,			
-		 '6' : 0.0012,
-		 '7' : 0.0012,
+                 '5' : 0.0012,
+                 '6' : 0.0012,
+                 '7' : 0.0012,
                  '8' : 0.0012,
 		 '9' : 0.0012,
                  '16': 0.0012}
@@ -353,7 +359,12 @@
 
 # Chosen latency based on config file - only for components whose latency is parameter dependent
 #xbar_lat = xbar_lat_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
-xbar_ip_lat = xbar_ip_lat
+xbar_ip_lat_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+      for key, value in xbar_ip_lat_dict.items():
+            xbar_ip_lat_dict[key] = xbar_ip_lat
+else:
+      xbar_ip_lat_dict = digi_param.Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
 xbar_op_lat = xbar_op_lat
 xbar_rd_lat = xbar_rd_lat
 xbar_wr_lat = xbar_wr_lat
@@ -365,8 +376,11 @@
 instrnMem_lat =  instrnMem_lat_dict[str(cfg.instrnMem_size)]
 dataMem_lat =  dataMem_lat_dict[str(cfg.dataMem_size)]
 
-# Chosen area based on config file - only for components whose latency is parameter dependent
-xbar_area = xbar_area_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
+# Chosen area based on config file - only for components whose area is parameter dependent
+if cfg.MVMU_ver == "Analog":
+        xbar_area = xbar_area_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+else:
+        xbar_area = digi_param.Digital_xbar_area_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
 dac_area = dac_area_dict [str(cfg.dac_res)]
 adc_area = adc_area_dict [str(cfg.adc_res)]
 xbar_inMem_area = xbar_inMem_area_dict[str(cfg.xbar_size)]
@@ -388,8 +402,20 @@
 instrnMem_pow_dyn =  instrnMem_pow_dyn_dict[str(cfg.instrnMem_size)] * math.sqrt(8) #area scaling for 8 bytes per instruction
 dataMem_pow_dyn =  dataMem_pow_dyn_dict[str(cfg.dataMem_size)]
 
+# Energy
+xbar_ip_energy_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+        for key,value in xbar_ip_energy_dict.items():
+                xbar_ip_energy_dict[key] = xbar_ip_lat*xbar_ip_pow_dyn
+else:
+        xbar_ip_energy_dict = digi_param.Digital_xbar_energy_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+print('xbar_ip_energy_dict', xbar_ip_energy_dict)
+
 # Chosen leak_power based on config file - only for components whose latency is parameter dependent
-xbar_pow_leak = 0
+if cfg.MVMU_ver == "Analog":
+        xbar_pow_leak = 0
+else:
+        xbar_pow_leak = digi_param.Digital_xbar_pow_leak_dict[str(cfg.xbar_size)]
 dac_pow_leak = dac_pow_leak_dict [str(cfg.dac_res)]
 adc_pow_leak = adc_pow_leak_dict [str(cfg.adc_res)]
 xbar_inMem_pow_leak = xbar_inMem_pow_leak_dict[str(cfg.xbar_size)]

diff --git a/include/constants_digital.py b/include/constants_digital.py
@@ -0,0 +1,175 @@
+## This file contains the power, area and latency numbers of Digital MVMUs for two versions
+Digital_xbar_lat_dict = {'Digital_V1': {'32': { '0':  130, # first indexed by version then by xbar_size and then by % sparsity
+                                                '10': 114, # For V1 it is (4n+2)*T and for V2 it is (3n+2+xbar_size)*T
+                                                '20': 102, 
+                                                '30': 90, 
+                                                '40': 78, 
+                                                '50': 66, 
+                                                '60': 50, 
+                                                '70': 38, 
+                                                '80': 26, 
+                                                '90': 14},      
+                                        '64': { '0' : 258, 
+                                                '10': 230,
+                                                '20': 206, 
+                                                '30': 178, 
+                                                '40': 154, 
+                                                '50': 130, 
+                                                '60': 102, 
+                                                '70': 78, 
+                                                '80': 50, 
+                                                '90': 26},      
+                                        '128':{ '0' : 514,
+                                                '10': 462, 
+                                                '20': 410, 
+                                                '30': 358, 
+                                                '40': 306, 
+                                                '50': 258, 
+                                                '60': 206, 
+                                                '70': 154, 
+                                                '80': 102, 
+                                                '90': 50},     
+                                        '256':{ '0' : 1026, 
+                                                '10': 922,
+                                                '20': 818, 
+                                                '30': 718, 
+                                                '40': 614, 
+                                                '50': 514, 
+                                                '60': 410, 
+                                                '70': 306, 
+                                                '80': 206, 
+                                                '90': 102}},
+                         'Digital_V2': {'32' :{ '0' : 130,
+                                                '10': 118,
+                                                '20': 109, 
+                                                '30': 100, 
+                                                '40': 91, 
+                                                '50': 82, 
+                                                '60': 70, 
+                                                '70': 61, 
+                                                '80': 52, 
+                                                '90': 43},  
+                                        '64' :{ '0' : 258,
+                                                '10': 237,  
+                                                '20': 219, 
+                                                '30': 198, 
+                                                '40': 180, 
+                                                '50': 162, 
+                                                '60': 141, 
+                                                '70': 123, 
+                                                '80': 102, 
+                                                '90': 84},
+                                        '128':{ '0' : 514,
+                                                '10': 475,  
+                                                '20': 436, 
+                                                '30': 397, 
+                                                '40': 358, 
+                                                '50': 322, 
+                                                '60': 283, 
+                                                '70': 244, 
+                                                '80': 205, 
+                                                '90': 166},
+                                        '256':{ '0' : 1026,
+                                                '10': 948,  
+                                                '20': 870, 
+                                                '30': 795, 
+                                                '40': 717, 
+                                                '50': 642, 
+                                                '60': 564, 
+                                                '70': 486, 
+                                                '80': 411, 
+                                                '90': 333}}}
+
+Digital_xbar_area_dict = {'Digital_V1': { '32' : 0.16977,   # first indexed by version then by xbar_size
+                                          '64' : 0.27701,
+                                          '128': 1.74020,
+                                          '256': 7.29481},
+                          'Digital_V2': { '32' : 0.16949,  
+                                          '64' : 0.27645,
+                                          '128': 1.73908,
+                                          '256': 7.29257}}
+
+Digital_xbar_energy_dict = {'Digital_V1':{'32':{'0' : 5261.43744,  # first indexed by version then by xbar_size and then by % of non_0 values
+                                                '10': 4613.872832, # For V1 it is (4n+2)*T and for V2 it is (3n+2+xbar_size)*T
+                                                '20': 4128.199376, # in pJ
+                                                '30': 3642.52592, 
+                                                '40': 3156.852464, 
+                                                '50': 2671.179008, 
+                                                '60': 2023.6144, 
+                                                '70': 1537.940944, 
+                                                '80': 1052.267488, 
+                                                '90': 566.594032},      
+                                          '64':{'0' : 20844.00864, 
+                                                '10': 18581.86252,
+                                                '20': 16642.88014, 
+                                                '30': 14380.73402, 
+                                                '40': 12441.75163, 
+                                                '50': 10502.76925, 
+                                                '60': 8240.623131, 
+                                                '70': 6301.640745, 
+                                                '80': 4039.494628, 
+                                                '90': 2100.512242},      
+                                        '128':{ '0' : 83018.14464,
+                                                '10': 74619.39346, 
+                                                '20': 66220.64228, 
+                                                '30': 57821.8911, 
+                                                '40': 49423.13992, 
+                                                '50': 41670.44653, 
+                                                '60': 33271.69535, 
+                                                '70': 24872.94417, 
+                                                '80': 16474.19299, 
+                                                '90': 8075.441812},     
+                                        '256':{ '0' : 331639.0958, 
+                                                '10': 298022.5268,
+                                                '20': 264405.9578, 
+                                                '30': 232082.3337, 
+                                                '40': 198465.7647, 
+                                                '50': 166142.1407, 
+                                                '60': 132525.5717, 
+                                                '70': 98909.00265, 
+                                                '80': 66585.3786, 
+                                                '90': 32968.80959}},
+                            'Digital_V2':{'32':{'0' : 4466.744263,
+                                                '10': 4053.765767,
+                                                '20': 3744.031895, 
+                                                '30': 3434.298023, 
+                                                '40': 3124.564151, 
+                                                '50': 2814.830279, 
+                                                '60': 2401.851783, 
+                                                '70': 2092.117911, 
+                                                '80': 1782.384039, 
+                                                '90': 1472.650167},  
+                                          '64':{'0' : 17654.27322,
+                                                '10': 16216.06481,  
+                                                '20': 14983.31474, 
+                                                '30': 13545.10633, 
+                                                '40': 12312.35626, 
+                                                '50': 11079.6062, 
+                                                '60': 9641.397787, 
+                                                '70': 8408.647721, 
+                                                '80': 6970.439311, 
+                                                '90': 5737.689245},
+                                        '128':{ '0' : 70237.24474,
+                                                '10': 64904.19392,  
+                                                '20': 59571.14309, 
+                                                '30': 54238.09226, 
+                                                '40': 48905.04144, 
+                                                '50': 43982.22529, 
+                                                '60': 38649.17446, 
+                                                '70': 33316.12363, 
+                                                '80': 27983.07281, 
+                                                '90': 22650.02198},
+                                        '256':{ '0' : 280471.5471,
+                                                '10': 259128.5,  
+                                                '20': 237785.453, 
+                                                '30': 217263.2925, 
+                                                '40': 195920.2454, 
+                                                '50': 175398.0849, 
+                                                '60': 154055.0379, 
+                                                '70': 132711.9909, 
+                                                '80': 112189.8303, 
+                                                '90': 90846.78326}}}
+Digital_xbar_pow_leak_dict = {  '32' :5.575928889,          #in mW 
+                                '64' :12.82466678,
+                                '128':40.24037556,
+                                '256':120.2098611}
diff --git a/include/example-configs/config-cnn.py b/include/example-configs/config-cnn.py
@@ -7,6 +7,13 @@
 xbar_record = 1
 inference = 1
 training = not(inference)
+sparse_opt = 1 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
+
+## Variable to define the type of MVMU
+# One of "Analog", "Digital_V1" or "Digital_V2" 
+# Digital_V1 has compressed inputs (Data+Offset style)
+# Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
+MVMU_ver = "Digital_V2"
 
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16

diff --git a/include/example-configs/config-mlp.py b/include/example-configs/config-mlp.py
@@ -7,6 +7,13 @@
 xbar_record = 1
 inference = 1
 training = not(inference)
+sparse_opt = 1 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
+
+## Variable to define the type of MVMU
+# One of "Analog", "Digital_V1" or "Digital_V2" 
+# Digital_V1 has compressed inputs (Data+Offset style)
+# Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
+MVMU_ver = "Digital_V2"
 
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16