Programs/obliv_nn_pruned_resnet/obliv_nn_pruned_resnet.mpc

from Compiler import matrix_lib
from Compiler import folding_lib
from Compiler import offline_triple_lib
from Compiler import relu_parallel_lib
from Compiler.library import print_ln
from Compiler.instructions import start_clock, stop_clock

#
# Version with 1 truncation per after each multiplication => no scaling.
# May be required for implementations with high precision (f=20)
#
def convolutional_block(X, K, Biases, Gamma, Beta, pooling, h, w, l, s, s_, kfh, kfw, debug=0):

#   CONVOLUTION:
#    h      -> feature height
#    w      -> feature width
#    l      -> padding
#    (2l+1) -> kernel height
#    (2l+1) -> kernel width
#    s      -> input channels
#    s_     -> output channels

#   FOLDING:
#    kfh   -> folding block height
#    kfw   -> folding block width

    # CONV. TRIPLE GENERATION
    stride = 1

    CONV_TRIPLE_TYPE = offline_triple_lib.NNTripleType(0, w, h, s, ((2*l)+1), ((2*l)+1), s_, stride, l)

    print_ln("..... executing convolution - version conv3d_sfix2sfix")
    start_clock(10)
    X_convolved = matrix_lib.conv3d_sfix2sfix(X, K, CONV_TRIPLE_TYPE, ((2*l)+1), ((2*l)+1), s, s_, h, w, 1, l)
    stop_clock(10)

    print_ln("..... adding biases")
    X_biased = matrix_lib.add_matrices(X_convolved, Biases)

    print_ln("..... executing batch normalization")
    start_clock(11)
    X_normalized = folding_lib.batch_normalization(X_biased, Gamma, Beta)
    stop_clock(11)

    print_ln("..... executing activation")
    start_clock(12)
    X_activated = relu_parallel_lib.relu_2d_parallel(X_normalized)
    stop_clock(12)

    if debug == 1:
        return [X_biased, X_normalized, X_activated]

    if pooling == "yes":
        stride = 2
        print_ln("..... executing average polling")
        start_clock(14)
        X_folded = folding_lib.folding(X_activated, kfh, kfw, stride, "avg_pool", h, w)
        stop_clock(14)
        return X_folded

    return X_activated


#
# Version with batched truncations.
# May require implementations with lower precision (e.g. 3 batched truncations require f=12)
#
def convolutional_block_fast(X, K, Biases, Gamma, Beta, pooling, h, w, l, s, s_, kfh, kfw, scaling=0):

#   CONVOLUTION:
#    h      -> feature height
#    w      -> feature width
#    l      -> padding
#    (2l+1) -> kernel height
#    (2l+1) -> kernel width
#    s      -> input channels
#    s_     -> output channels

#   FOLDING:
#    kfh   -> folding block height
#    kfw   -> folding block width

    mode = matrix_lib.Trunc_Mode.OFF
    stride = 1

    CONV_TRIPLE_TYPE = offline_triple_lib.NNTripleType(0, w, h, s, ((2*l)+1), ((2*l)+1), s_, stride, l)

    print_ln("..... executing convolution - version conv3d_sfix2sfix")
    start_clock(10)
    scaling = scaling + 1 # convolution
    X_convolved = matrix_lib.conv3d_sfix2sfix(X, K, CONV_TRIPLE_TYPE, ((2*l)+1), ((2*l)+1), s, s_, h, w, stride, l, mode)
    stop_clock(10)

    print_ln("..... adding biases")
    Biases = matrix_lib.scale_matrix(Biases, scaling)
    X_biased = matrix_lib.add_matrices(X_convolved, Biases)

    print_ln("..... executing batch normalization")
    start_clock(11)
    scaling = scaling + 1 # multiplication by gamma
    Beta = matrix_lib.scale_matrix(Beta, scaling)
    X_normalized = folding_lib.batch_normalization(X_biased, Gamma, Beta, mode)
    stop_clock(11)

    print_ln("..... executing activation")
    start_clock(12)
    X_activated = matrix_lib.truncate_sfix_matrix_plus_ReLU(X_normalized, scaling)
    stop_clock(12)

    if pooling == "yes":
        stride = 2
        padding = 0
        print_ln("..... executing average polling")
        start_clock(14)
        X_folded = folding_lib.folding(X_activated, kfh, kfw, stride, "avg_pool", h, w, padding, mode)
        stop_clock(14)
        return [X_folded, X_activated]

    return X_activated

# X is a 3D feature matrix
# Y is an array of 3D kernels - 1 item per layer
# Biases contain the bias for linear transformations - 1 per neuron
# Gamma & Beta are parameters for normalization - 1 per dimension
# Triple is an array of of tuples with conv triples - 1 item per layer

def oblivious_pruned_resnet(X, Y, Biases, Gamma, Beta):

#######

    #####  INPUT FEATURES
    X1_layer = matrix_lib.rearrange_3d_features_into_2d_matrix(X)

    #####  KERNELS for CONVOLUTIONAL LAYER
    K1_layer = matrix_lib.rearrange_4d_kernels_into_2d_matrix(Y[0])
    K2_layer = matrix_lib.rearrange_4d_kernels_into_2d_matrix(Y[1])
    K3_layer = matrix_lib.rearrange_4d_kernels_into_2d_matrix(Y[2])
    K4_layer = matrix_lib.rearrange_4d_kernels_into_2d_matrix(Y[3])
    K5_layer = matrix_lib.rearrange_4d_kernels_into_2d_matrix(Y[4])
    K6_layer = matrix_lib.rearrange_4d_kernels_into_2d_matrix(Y[5])

    # scale_up = 1 if previous layer has pooling / scale_up = 0 otherwise
    scale_up = 1

    start_clock(99)
    #####  CONVOLUTIONAL LAYERS                                                                             h   w   l   s   s_  kfh  kfw
    X2_layer = convolutional_block_fast(X1_layer, K1_layer, Biases[0], Gamma[0], Beta[0], "no", 32, 32,  1,  3,  32,   0,   0)
    print_ln("----- executing pResNet - LAYER 1 completed - Conv -----")
    X3_layer, X3_layer_relu = convolutional_block_fast(X2_layer, K2_layer, Biases[1], Gamma[1], Beta[1], "yes", 32, 32,  1, 32,  64,   2,   2)
    print_ln("----- executing pResNet - LAYER 2 completed - Conv -----")
    X4_layer = convolutional_block_fast(X3_layer, K3_layer, Biases[2], Gamma[2], Beta[2], "no", 16, 16,  1, 64, 128,   0,   0, scale_up)
    print_ln("----- executing pResNet - LAYER 3 completed - Conv -----")
    X5_layer, X5_layer_relu = convolutional_block_fast(X4_layer, K4_layer, Biases[3], Gamma[3], Beta[3], "yes", 16, 16,  1,128, 128,   2,   2)
    print_ln("----- executing pResNet - LAYER 4 completed - Conv -----")
    X6_layer = convolutional_block_fast(X5_layer, K5_layer, Biases[4], Gamma[4], Beta[4], "no",  8,  8,  1,128, 256,   0,   0, scale_up)
    print_ln("----- executing pResNet - LAYER 5 completed - Conv -----")
    X7_layer, X7_layer_relu = convolutional_block_fast(X6_layer, K6_layer, Biases[5], Gamma[5], Beta[5], "yes",  8,  8,  1,256, 256,   2,   2)
    print_ln("----- executing pResNet - LAYER 6 completed - Conv -----")
    stop_clock(99)

    #####  NEURON WEIGHTS for FC LAYERS
    FC1_layer = Y[6]
    FC2_layer = Y[7]

    M_TRIPLE_TYPE_L7 = offline_triple_lib.TripleType(0, 1, len(FC1_layer), len(FC1_layer), len(FC1_layer[0]), 1, len(FC1_layer[0]))

    start_clock(16)
    X7_flattened = matrix_lib.flatten_to_rowmatrix(X7_layer)
    X7_fully_connected = matrix_lib.multmat_sfix2sfix(X7_flattened, FC1_layer, M_TRIPLE_TYPE_L7, matrix_lib.Trunc_Mode.OFF)
    scaling = 2 # [ previous folding + multmat ]
    B7 = matrix_lib.scale_matrix(Biases[6], scaling)
    X7_biased = matrix_lib.add_matrices(X7_fully_connected, B7)
    X8_layer = matrix_lib.truncate_sfix_matrix_plus_ReLU(X7_biased, scaling)
    print_ln("----- executing pResNet - LAYER 7 completed - FC -----")

    M_TRIPLE_TYPE_L8 = offline_triple_lib.TripleType(0, 1, len(FC2_layer), len(FC2_layer), len(FC2_layer[0]), 1, len(FC2_layer[0]))

    X8_fully_connected = matrix_lib.multmat_sfix2sfix(X8_layer, FC2_layer, M_TRIPLE_TYPE_L8, matrix_lib.Trunc_Mode.ON)
    X9_layer = matrix_lib.add_matrices(X8_fully_connected, Biases[7])
    print_ln("----- executing pResNet - LAYER 8 completed - FC -----")
    stop_clock(16)

    ### SOFTMAX??
    ### output = folding_lib.softmax_scaled(X9_layer, -15, True)

    results = [X2_layer, X3_layer_relu, X4_layer, X5_layer_relu, X6_layer, X7_layer_relu, X8_layer, X9_layer]

    return results