diff --git a/ADBench/run-all.ps1 b/ADBench/run-all.ps1 old mode 100644 new mode 100755 index 4893cc24..c97384d7 --- a/ADBench/run-all.ps1 +++ b/ADBench/run-all.ps1 @@ -29,52 +29,52 @@ https://github.com/awf/ADBench #> -param(# Which build to test. +param(# Which build to test. # Builds should leave a script file 'cmake-vars-$buildtype.ps1' in the ADBench directory. # which sets $bindir to the build directory, # And if only some D,K are valid for GMM, sets $gmm_d_vals, and $gmm_k_vals [string]$buildtype="Release", - - # Estimated time of accurate result achievement. - # A runner cyclically reruns measured function until total time becomes more than that value. + + # Estimated time of accurate result achievement. + # A runner cyclically reruns measured function until total time becomes more than that value. # Supported only by the benchmark runner-based tools # (those with ToolType cpp, dotnet, julia, or python). [double]$minimum_measurable_time = 0.5, - # Maximum number of times to run the function for timing - [int]$nruns_f=10, - + # Maximum number of times to run the function for timing + [int]$nruns_f=10, + # Maximum number of times to run the jacobian for timing - [int]$nruns_J=10, - + [int]$nruns_J=10, + # How many seconds to wait before we believe we have accurate timings - [double]$time_limit=10, - - # Kill the test after this many seconds + [double]$time_limit=10, + + # Kill the test after this many seconds [double]$timeout=300, # Kill the test if it consumes more than this many gigabytes of RAM. [double]$max_memory_amount_in_gb=[double]::PositiveInfinity, - - # Where to store the ouput, defaults to tmp/ in the project root + + # Where to store the ouput, defaults to tmp/ in the project root [string]$tmpdir="", - + # Repeat tests, even if output file exists [switch]$repeat, - + # Repeat only failed tests [switch]$repeat_failures, - + # List of tools to run [string[]]$tools=@(), - + # Don't delete produced jacobians even if they're accurate - [switch]$keep_correct_jacobians, - + [switch]$keep_correct_jacobians, + # GMM D values to try. Must be a subset of the list of - # compiled values in ADBench/cmake-vars-$buildtype.ps1 + # compiled values in ADBench/cmake-vars-$buildtype.ps1 [int[]]$gmm_d_vals_param, - + # GMM K values to run. As above. [int[]]$gmm_k_vals_param, @@ -209,7 +209,7 @@ function run_command ($indent, $outfile, $timeout, $cmd) { $mem = [math]::round($mem / (1024 * 1024 * 1024), 2) Write-Host "${indent}Killed due to consuming $mem GB of operating memory" Store-NonFatalError "Process killed due to consuming $mem GB of operating memory`n[$cmd $args]" - + $status = [RunCommandStatus]::OutOfMemory break } @@ -327,16 +327,16 @@ Class Tool { This will create a Tool: - called "Finite" - run from binary executables - - runs all four tests + - runs all four tests - does not do GMM in separate FULL and SPLIT modes - doesn't require separate executables for different GMM sizes - does not check the correctness of the computed jacobians .NOTES - $objectives is an enumerable variable, + $objectives is an enumerable variable, where each flag determines whether to run a certain objective: GMM, BA, HAND, LSTM - + #> $this.name = $name @@ -358,16 +358,16 @@ Class Tool { This will create a Tool: - called "Finite" - run from binary executables - - runs all four tests + - runs all four tests - does not do GMM in separate FULL and SPLIT modes - doesn't require separate executables for different GMM sizes - does not check the correctness of the computed jacobians .NOTES - $objectives is an enumerable variable, + $objectives is an enumerable variable, where each flag determines whether to run a certain objective: GMM, BA, HAND, LSTM - + #> $this.name = $name @@ -477,11 +477,11 @@ Class Tool { if ($run_command_status -eq [RunCommandStatus]::OutOfMemory) { $status = [RunTestStatus]::OutOfMemory } - + return $status } - # Get postfix for tool output file name + # Get postfix for tool output file name [string] get_out_name_postfix([string]$objective) { $postfix = $this.name @@ -697,7 +697,10 @@ $tool_descriptors = @( [Tool]::new("ManualEigen", "cpp", [ObjectiveType] "GMM, BA, Hand, LSTM", $true, $default_tolerance) [Tool]::new("ManualEigenVector", "cpp", [ObjectiveType] "GMM", $true, $default_tolerance) [Tool]::new("DiffSharpModule", "dotnet", [ObjectiveType] "GMM, BA, Hand, LSTM", $true, $default_tolerance) - [Tool]::new("Tapenade", "cpp", [ObjectiveType] "BA, LSTM, GMM, Hand", $true, $default_tolerance) + [Tool]::new("Tapenade", "cpp", [ObjectiveType] "GMM, BA", $true, $default_tolerance) + [Tool]::new("Clad", "cpp", [ObjectiveType] "GMM, BA", $true, $default_tolerance) + [Tool]::new("CladLatest", "cpp", [ObjectiveType] "GMM, BA", $true, $default_tolerance) + [Tool]::new("CladLatestTBR", "cpp", [ObjectiveType] "GMM, BA", $true, $default_tolerance) [Tool]::new("PyTorch", "python", [ObjectiveType] "BA, LSTM, GMM, Hand", $true, 1e-7) [Tool]::new("TorchScript", "python", [ObjectiveType] "GMM", $true, 1e-7) [Tool]::new("Tensorflow", "python", [ObjectiveType] "BA, LSTM, GMM, Hand", $true, $default_tolerance) diff --git a/src/cpp/modules/CMakeLists.txt b/src/cpp/modules/CMakeLists.txt index eecc1f8c..9a58864f 100644 --- a/src/cpp/modules/CMakeLists.txt +++ b/src/cpp/modules/CMakeLists.txt @@ -1,5 +1,8 @@ project("ADBench_Modules") +# add_subdirectory ("clad") +add_subdirectory ("cladLatest") +add_subdirectory ("cladLatestTBR") add_subdirectory ("manual") add_subdirectory ("manualEigen") add_subdirectory ("manualEigenVector") diff --git a/src/cpp/modules/cladLatest/CMakeLists.txt b/src/cpp/modules/cladLatest/CMakeLists.txt new file mode 100644 index 00000000..eb70843d --- /dev/null +++ b/src/cpp/modules/cladLatest/CMakeLists.txt @@ -0,0 +1,15 @@ +project("ADBench_CladLatest" CXX) + +add_library("CladLatest" MODULE) + +SET(clad_install_root "/home/mad-scientist/clubs/open-source/clad/formal/builds/build-llvm-16/inst") + +target_sources("CladLatest" PRIVATE "${CMAKE_SOURCE_DIR}/src/cpp/shared/utils.cpp") +target_sources("CladLatest" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/ba/ba_grad.cpp") +target_sources("CladLatest" PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/CladLatestBA.cpp") +target_sources("CladLatest" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/gmm/gmm_grad.cpp") +target_sources("CladLatest" PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/CladLatestGMM.cpp") + +target_include_directories("CladLatest" PRIVATE ${clad_install_root}/include) +target_include_directories("CladLatest" PRIVATE ${CMAKE_SOURCE_DIR}/src/cpp/shared) +# add_compile_definitions("CladLatest" CLAD_NO_NUM_DIFF) \ No newline at end of file diff --git a/src/cpp/modules/cladLatest/CladLatestBA.cpp b/src/cpp/modules/cladLatest/CladLatestBA.cpp new file mode 100644 index 00000000..f5b06174 --- /dev/null +++ b/src/cpp/modules/cladLatest/CladLatestBA.cpp @@ -0,0 +1,115 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#include "CladLatestBA.h" +#include "ba/ba_grad.h" + +// This function must be called before any other function. +void CladLatestBA::prepare(BAInput &&input) { + this->input = input; + result = {std::vector(2 * this->input.p), + std::vector(this->input.p), + BASparseMat(this->input.n, this->input.m, this->input.p)}; + + reproj_err_d = std::vector(2 * (BA_NCAMPARAMS + 3 + 1)); + reproj_err_d_row = std::vector(BA_NCAMPARAMS + 3 + 1); +} + +BAOutput CladLatestBA::output() { return result; } + +void CladLatestBA::calculate_objective(int times) { + for (int i = 0; i < times; i++) { + ba_objective(input.n, input.m, input.p, input.cams.data(), input.X.data(), + input.w.data(), input.obs.data(), input.feats.data(), + result.reproj_err.data(), result.w_err.data()); + } +} + +void CladLatestBA::calculate_jacobian(int times) { + for (int i = 0; i < times; i++) { + result.J.clear(); + calculate_reproj_error_jacobian_part(); + calculate_weight_error_jacobian_part(); + } +} + +void CladLatestBA::calculate_reproj_error_jacobian_part() { + double + errb[2]; // stores dY + // (i-th element equals to 1.0 for calculating i-th jacobian row) + + double err[2]; // stores fictive result + // (Tapenade doesn't calculate an original function in + // reverse mode) + + clad::array_ref cam_gradient_part(reproj_err_d_row.data(), + reproj_err_d_row.size()); + clad::array_ref x_gradient_part( + reproj_err_d_row.data() + BA_NCAMPARAMS, + reproj_err_d_row.size() - BA_NCAMPARAMS); + clad::array_ref weight_gradient_part( + reproj_err_d_row.data() + BA_NCAMPARAMS + 3, + reproj_err_d_row.size() - BA_NCAMPARAMS - 3); + std::vector feats_gradient_part(2, 0); + clad::array_ref feats_gradient_part_ref(feats_gradient_part.data(), + feats_gradient_part.size()); + clad::array_ref errb_ref(errb, 2); + + for (int i = 0; i < input.p; i++) { + std::fill(reproj_err_d_row.begin(), reproj_err_d_row.end(), 0); + std::fill(feats_gradient_part.begin(), feats_gradient_part.end(), 0); + + int camIdx = input.obs[2 * i + 0]; + int ptIdx = input.obs[2 * i + 1]; + + // calculate first row + errb[0] = 1.0; + errb[1] = 0.0; + computeReprojError_pullback( + &input.cams[camIdx * BA_NCAMPARAMS], &input.X[ptIdx * 3], &input.w[i], + &input.feats[i * 2], err, cam_gradient_part, x_gradient_part, + weight_gradient_part, feats_gradient_part_ref, errb_ref); + + // fill first row elements + for (int j = 0; j < BA_NCAMPARAMS + 3 + 1; j++) { + reproj_err_d[2 * j] = reproj_err_d_row[j]; + } + + std::fill(reproj_err_d_row.begin(), reproj_err_d_row.end(), 0); + std::fill(feats_gradient_part.begin(), feats_gradient_part.end(), 0); + // calculate second row + errb[0] = 0.0; + errb[1] = 1.0; + computeReprojError_pullback( + &input.cams[camIdx * BA_NCAMPARAMS], &input.X[ptIdx * 3], &input.w[i], + &input.feats[i * 2], err, cam_gradient_part, x_gradient_part, + weight_gradient_part, feats_gradient_part_ref, errb); + + // fill second row elements + for (int j = 0; j < BA_NCAMPARAMS + 3 + 1; j++) { + reproj_err_d[2 * j + 1] = reproj_err_d_row[j]; + } + + result.J.insert_reproj_err_block(i, camIdx, ptIdx, reproj_err_d.data()); + } +} + +void CladLatestBA::calculate_weight_error_jacobian_part() { + for (int j = 0; j < input.p; j++) { + double wb = 0; // stores calculated derivative + + double err = 0.0; // stores fictive result + // (Tapenade doesn't calculate an original function in + // reverse mode) + + double errb = 1.0; // stores dY + // (equals to 1.0 for derivative calculation) + + computeZachWeightError_pullback(&input.w[j], &err, &wb, &errb); + result.J.insert_w_err_block(j, wb); + } +} + +extern "C" DLL_PUBLIC ITest *get_ba_test() { + return new CladLatestBA(); +} \ No newline at end of file diff --git a/src/cpp/modules/cladLatest/CladLatestBA.h b/src/cpp/modules/cladLatest/CladLatestBA.h new file mode 100644 index 00000000..f3d4e133 --- /dev/null +++ b/src/cpp/modules/cladLatest/CladLatestBA.h @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#include "../../shared/ITest.h" +#include "../../shared/BAData.h" +#include "../../shared/defs.h" + +#include "ba/ba.h" + +#include + +class CladLatestBA : public ITest +{ +private: + BAInput input; + BAOutput result; + std::vector state; + + // buffer for reprojection error jacobian part holding (column-major) + std::vector reproj_err_d; + + // buffer for reprojection error jacobian block row holding + std::vector reproj_err_d_row; +public: + // This function must be called before any other function. + virtual void prepare(BAInput&& input) override; + + virtual void calculate_objective(int times) override; + virtual void calculate_jacobian(int times) override; + virtual BAOutput output() override; + + ~CladLatestBA() {} + +private: + void calculate_weight_error_jacobian_part(); + void calculate_reproj_error_jacobian_part(); +}; \ No newline at end of file diff --git a/src/cpp/modules/cladLatest/CladLatestGMM.cpp b/src/cpp/modules/cladLatest/CladLatestGMM.cpp new file mode 100644 index 00000000..c5d20b0f --- /dev/null +++ b/src/cpp/modules/cladLatest/CladLatestGMM.cpp @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#include "CladLatestGMM.h" + +#include "gmm/gmm.h" +#include "gmm/gmm_grad.h" + +// This function must be called before any other function. +void CladLatestGMM::prepare(GMMInput&& input) +{ + this->input = input; + int Jcols = (this->input.k * (this->input.d + 1) * (this->input.d + 2)) / 2; + result = { 0, std::vector(Jcols) }; +} + + + +GMMOutput CladLatestGMM::output() +{ + return result; +} + + + +void CladLatestGMM::calculate_objective(int times) +{ + for (int i = 0; i < times; i++) + { + gmm_objective( + input.d, + input.k, + input.n, + input.alphas.data(), + input.means.data(), + input.icf.data(), + input.x.data(), + input.wishart, + &result.objective + ); + } +} + + + +void CladLatestGMM::calculate_jacobian(int times) +{ + double* alphas_gradient_part = result.gradient.data(); + double* means_gradient_part = result.gradient.data() + input.alphas.size(); + double* icf_gradient_part = + result.gradient.data() + + input.alphas.size() + + input.means.size(); + std::vector d_x(input.x.size(), 0); + + for (int i = 0; i < times; i++) + { + double tmp = 0.0; // stores fictive result + // (Tapenade doesn't calculate an original function in reverse mode) + + double errb = 1.0; // stores dY + // (equals to 1.0 for gradient calculation) + + int d_d = 0, d_k = 0, d_n = 0; + Wishart d_wishart{0, 0}; + std::fill(result.gradient.begin(), result.gradient.end(), 0); + std::fill(d_x.begin(), d_x.end(), 0); + + gmm_objective_pullback( + input.d, + input.k, + input.n, + input.alphas.data(), + input.means.data(), + input.icf.data(), + input.x.data(), + input.wishart, + &tmp, + &d_d, + &d_k, + &d_n, + alphas_gradient_part, + means_gradient_part, + icf_gradient_part, + d_x.data(), + &d_wishart, + &errb + ); + } +} + + + +extern "C" DLL_PUBLIC ITest* get_gmm_test() +{ + return new CladLatestGMM(); +} \ No newline at end of file diff --git a/src/cpp/modules/cladLatest/CladLatestGMM.h b/src/cpp/modules/cladLatest/CladLatestGMM.h new file mode 100644 index 00000000..7d276dac --- /dev/null +++ b/src/cpp/modules/cladLatest/CladLatestGMM.h @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#include "../../shared/ITest.h" +#include "../../shared/GMMData.h" + +#include + +class CladLatestGMM : public ITest +{ +private: + GMMInput input; + GMMOutput result; + std::vector state; + +public: + // This function must be called before any other function. + virtual void prepare(GMMInput&& input) override; + + virtual void calculate_objective(int times) override; + virtual void calculate_jacobian(int times) override; + virtual GMMOutput output() override; + + ~CladLatestGMM() {} +}; + diff --git a/src/cpp/modules/cladLatest/ba/ba.h b/src/cpp/modules/cladLatest/ba/ba.h new file mode 100644 index 00000000..937a686f --- /dev/null +++ b/src/cpp/modules/cladLatest/ba/ba.h @@ -0,0 +1,192 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. +#ifndef CLADLATEST_BA_H +#define CLADLATEST_BA_H +#include +#include +#include + +#include "defs.h" +#include "matrix.h" + +//////////////////////////////////////////////////////////// +//////////////////// Declarations ////////////////////////// +//////////////////////////////////////////////////////////// + +// cam: 11 camera in format [r1 r2 r3 C1 C2 C3 f u0 v0 k1 k2] +// r1, r2, r3 are angle - axis rotation parameters(Rodrigues) +// [C1 C2 C3]' is the camera center +// f is the focal length in pixels +// [u0 v0]' is the principal point +// k1, k2 are radial distortion parameters +// X: 3 point +// feats: 2 feature (x,y coordinates) +// reproj_err: 2 +// projection function: +// Xcam = R * (X - C) +// distorted = radial_distort(projective2euclidean(Xcam), radial_parameters) +// proj = distorted * f + principal_point +// err = sqsum(proj - measurement) +template +void computeReprojError(const T *const cam, const T *const X, const T *const w, + const double *const feat, T *err); + +// w: 1 +// w_err: 1 +template void computeZachWeightError(const T *const w, T *err); + +// n number of cameras +// m number of points +// p number of observations +// cams: 11*n cameras in format [r1 r2 r3 C1 C2 C3 f u0 v0 k1 k2] +// r1, r2, r3 are angle - axis rotation parameters(Rodrigues) +// [C1 C2 C3]' is the camera center +// f is the focal length in pixels +// [u0 v0]' is the principal point +// k1, k2 are radial distortion parameters +// X: 3*m points +// obs: 2*p observations (pairs cameraIdx, pointIdx) +// feats: 2*p features (x,y coordinates corresponding to observations) +// reproj_err: 2*p errors of observations +// w_err: p weight "error" terms +// projection function: +// Xcam = R * (X - C) +// distorted = radial_distort(projective2euclidean(Xcam), radial_parameters) +// proj = distorted * f + principal_point +// err = sqsum(proj - measurement) +template +void ba_objective(int n, int m, int p, const T *const cams, const T *const X, + const T *const w, const int *const obs, + const double *const feats, T *reproj_err, T *w_err); + +// rot: 3 rotation parameters +// pt: 3 point to be rotated +// rotatedPt: 3 rotated point +// this is an efficient evaluation (part of +// the Ceres implementation) +// easy to understand calculation in matlab: +// theta = sqrt(sum(w. ^ 2)); +// n = w / theta; +// n_x = au_cross_matrix(n); +// R = eye(3) + n_x*sin(theta) + n_x*n_x*(1 - cos(theta)); +template +void rodrigues_rotate_point(const T *const rot, const T *const pt, + T *rotatedPt); + +//////////////////////////////////////////////////////////// +//////////////////// Definitions /////////////////////////// +//////////////////////////////////////////////////////////// + +template T sqsum(int n, const T *const x) { + T res = 0; + int i = 0; + for (; i < n; i++) + res = res + x[i] * x[i]; + return res; +} + +template +void rodrigues_rotate_point(const T * rot, const T * pt, + T *rotatedPt) { + T sqtheta = sqsum(3, rot); + int i0 = 0; + int i1 = 0; + int i2 = 0; + T theta, costheta, sintheta, theta_inverse, tmp; + T w[3], w_cross_pt[3]; + T rot_cross_pt[3]; + if (sqtheta != 0) { + theta = sqrt(sqtheta); + costheta = cos(theta); + sintheta = sin(theta); + theta_inverse = 1.0 / theta; + + for (; i0 < 3; i0++) + w[i0] = rot[i0] * theta_inverse; + + cross(w, pt, w_cross_pt); + + tmp = (w[0] * pt[0] + w[1] * pt[1] + w[2] * pt[2]) * (1. - costheta); + + for (; i1 < 3; i1++) + rotatedPt[i1] = pt[i1] * costheta + w_cross_pt[i1] * sintheta + w[i1] * tmp; + } else { + cross(rot, pt, rot_cross_pt); + + for (; i2 < 3; i2++) + rotatedPt[i2] = pt[i2] + rot_cross_pt[i2]; + } +} + +template void radial_distort(const T *const rad_params, T *proj) { + T rsq, L; + rsq = sqsum(2, proj); + L = 1. + rad_params[0] * rsq + rad_params[1] * rsq * rsq; + proj[0] = proj[0] * L; + proj[1] = proj[1] * L; +} + +template +void project(const T * cam, const T * X, T *proj) { + const T *const C = &cam[3]; + T Xo[3], Xcam[3]; + + Xo[0] = X[0] - C[0]; + Xo[1] = X[1] - C[1]; + Xo[2] = X[2] - C[2]; + + rodrigues_rotate_point(cam + 0, Xo, Xcam); + + proj[0] = Xcam[0] / Xcam[2]; + proj[1] = Xcam[1] / Xcam[2]; + + radial_distort(cam + 9, proj); + + proj[0] = proj[0] * cam[6] + cam[7]; + proj[1] = proj[1] * cam[6] + cam[8]; +} + +template +void computeReprojError(const T * cam, const T * X, const T * w, + const double * feat, T *err) { + T proj[2]; + project(cam, X, proj); + + err[0] = (*w) * (proj[0] - feat[0]); + err[1] = (*w) * (proj[1] - feat[1]); +} + +template void computeZachWeightError(const T *const w, T *err) { + *err = 1 - (*w) * (*w); +} + +template +void ba_objective(int n, int m, int p, const T *const cams, const T *const X, + const T *const w, const int *const obs, + const double *const feats, T *reproj_err, T *w_err) { + for (int i = 0; i < p; i++) { + int camIdx = obs[i * 2 + 0]; + int ptIdx = obs[i * 2 + 1]; + computeReprojError(&cams[camIdx * BA_NCAMPARAMS], &X[ptIdx * 3], &w[i], + &feats[i * 2], &reproj_err[2 * i]); + } + + for (int i = 0; i < p; i++) { + computeZachWeightError(&w[i], &w_err[i]); + } +} + +template +T computeReprojError_wrapper(const T * cam, const T * X, + const T * w, const double * feat, + T *err) { + computeReprojError(cam, X, w, feat, err); + return err[0]; +} + +template +T computeZachWeightError_wrapper(const T * w, T *err) { + computeZachWeightError(w, err); + return err[0]; +} +#endif \ No newline at end of file diff --git a/src/cpp/modules/cladLatest/ba/ba_grad.cpp b/src/cpp/modules/cladLatest/ba/ba_grad.cpp new file mode 100644 index 00000000..3f9a7772 --- /dev/null +++ b/src/cpp/modules/cladLatest/ba/ba_grad.cpp @@ -0,0 +1,519 @@ +#include "ba.h" +#include "clad/Differentiator/Differentiator.h" + +void sqsum_pullback(int n, const double *const x, double _d_y, + clad::array_ref _d_n, clad::array_ref _d_x) { + double _d_res = 0; + int _d_i = 0; + unsigned long _t0; + clad::tape _t1 = {}; + double res = 0; + int i = 0; + _t0 = 0; + for (; i < n; i++) { + _t0++; + clad::push(_t1, res); + res = res + x[i] * x[i]; + } + goto _label0; +_label0: + _d_res += _d_y; + for (; _t0; _t0--) { + i--; + res = clad::pop(_t1); + double _r_d0 = _d_res; + _d_res += _r_d0; + _d_x[i] += _r_d0 * x[i]; + _d_x[i] += x[i] * _r_d0; + _d_res -= _r_d0; + } +} +void cross_pullback(const double *const a, const double *const b, double *out, + clad::array_ref _d_a, clad::array_ref _d_b, + clad::array_ref _d_out) { + double _t0; + double _t1; + double _t2; + _t0 = out[0]; + out[0] = a[1] * b[2] - a[2] * b[1]; + _t1 = out[1]; + out[1] = a[2] * b[0] - a[0] * b[2]; + _t2 = out[2]; + out[2] = a[0] * b[1] - a[1] * b[0]; + { + out[2] = _t2; + double _r_d2 = _d_out[2]; + _d_a[0] += _r_d2 * b[1]; + _d_b[1] += a[0] * _r_d2; + _d_a[1] += -_r_d2 * b[0]; + _d_b[0] += a[1] * -_r_d2; + _d_out[2] -= _r_d2; + _d_out[2]; + } + { + out[1] = _t1; + double _r_d1 = _d_out[1]; + _d_a[2] += _r_d1 * b[0]; + _d_b[0] += a[2] * _r_d1; + _d_a[0] += -_r_d1 * b[2]; + _d_b[2] += a[0] * -_r_d1; + _d_out[1] -= _r_d1; + _d_out[1]; + } + { + out[0] = _t0; + double _r_d0 = _d_out[0]; + _d_a[1] += _r_d0 * b[2]; + _d_b[2] += a[1] * _r_d0; + _d_a[2] += -_r_d0 * b[1]; + _d_b[1] += a[2] * -_r_d0; + _d_out[0] -= _r_d0; + _d_out[0]; + } +} +void rodrigues_rotate_point_pullback(const double *rot, const double *pt, + double *rotatedPt, + clad::array_ref _d_rot, + clad::array_ref _d_pt, + clad::array_ref _d_rotatedPt) { + const double *_t0; + double _d_sqtheta = 0; + int _d_i0 = 0; + int _d_i1 = 0; + int _d_i2 = 0; + double _d_theta = 0, _d_costheta = 0, _d_sintheta = 0, _d_theta_inverse = 0, + _d_tmp = 0; + clad::array _d_w(3UL), _d_w_cross_pt(3UL); + clad::array _d_rot_cross_pt(3UL); + bool _cond0; + double _t1; + double _t2; + double _t3; + double _t4; + unsigned long _t5; + clad::tape _t6 = {}; + const double *_t7; + double _t8; + unsigned long _t9; + clad::tape _t10 = {}; + const double *_t11; + const double *_t12; + unsigned long _t13; + clad::tape _t14 = {}; + _t0 = rot; + double sqtheta = sqsum(3, rot); + int i0 = 0; + int i1 = 0; + int i2 = 0; + double theta, costheta, sintheta, theta_inverse, tmp; + double w[3], w_cross_pt[3]; + double rot_cross_pt[3]; + _cond0 = sqtheta != 0; + if (_cond0) { + _t1 = theta; + theta = sqrt(sqtheta); + _t2 = costheta; + costheta = cos(theta); + _t3 = sintheta; + sintheta = sin(theta); + _t4 = theta_inverse; + theta_inverse = 1. / theta; + _t5 = 0; + for (; i0 < 3; i0++) { + _t5++; + clad::push(_t6, w[i0]); + w[i0] = rot[i0] * theta_inverse; + } + _t7 = pt; + cross(w, pt, w_cross_pt); + _t8 = tmp; + tmp = (w[0] * pt[0] + w[1] * pt[1] + w[2] * pt[2]) * (1. - costheta); + _t9 = 0; + for (; i1 < 3; i1++) { + _t9++; + clad::push(_t10, rotatedPt[i1]); + rotatedPt[i1] = + pt[i1] * costheta + w_cross_pt[i1] * sintheta + w[i1] * tmp; + } + } else { + _t11 = rot; + _t12 = pt; + cross(rot, pt, rot_cross_pt); + _t13 = 0; + for (; i2 < 3; i2++) { + _t13++; + clad::push(_t14, rotatedPt[i2]); + rotatedPt[i2] = pt[i2] + rot_cross_pt[i2]; + } + } + if (_cond0) { + for (; _t9; _t9--) { + i1--; + rotatedPt[i1] = clad::pop(_t10); + double _r_d6 = _d_rotatedPt[i1]; + _d_pt[i1] += _r_d6 * costheta; + _d_costheta += pt[i1] * _r_d6; + _d_w_cross_pt[i1] += _r_d6 * sintheta; + _d_sintheta += w_cross_pt[i1] * _r_d6; + _d_w[i1] += _r_d6 * tmp; + _d_tmp += w[i1] * _r_d6; + _d_rotatedPt[i1] -= _r_d6; + _d_rotatedPt[i1]; + } + { + tmp = _t8; + double _r_d5 = _d_tmp; + _d_w[0] += _r_d5 * (1. - costheta) * pt[0]; + _d_pt[0] += w[0] * _r_d5 * (1. - costheta); + _d_w[1] += _r_d5 * (1. - costheta) * pt[1]; + _d_pt[1] += w[1] * _r_d5 * (1. - costheta); + _d_w[2] += _r_d5 * (1. - costheta) * pt[2]; + _d_pt[2] += w[2] * _r_d5 * (1. - costheta); + _d_costheta += -(w[0] * pt[0] + w[1] * pt[1] + w[2] * pt[2]) * _r_d5; + _d_tmp -= _r_d5; + } + { + pt = _t7; + cross_pullback(w, _t7, w_cross_pt, _d_w, _d_pt, _d_w_cross_pt); + } + for (; _t5; _t5--) { + i0--; + w[i0] = clad::pop(_t6); + double _r_d4 = _d_w[i0]; + _d_rot[i0] += _r_d4 * theta_inverse; + _d_theta_inverse += rot[i0] * _r_d4; + _d_w[i0] -= _r_d4; + _d_w[i0]; + } + { + theta_inverse = _t4; + double _r_d3 = _d_theta_inverse; + double _r4 = _r_d3 * -1. / (theta * theta); + _d_theta += _r4; + _d_theta_inverse -= _r_d3; + } + { + sintheta = _t3; + double _r_d2 = _d_sintheta; + double _r3 = + _r_d2 * + clad::custom_derivatives::sin_pushforward(theta, 1.).pushforward; + _d_theta += _r3; + _d_sintheta -= _r_d2; + } + { + costheta = _t2; + double _r_d1 = _d_costheta; + double _r2 = + _r_d1 * + clad::custom_derivatives::cos_pushforward(theta, 1.).pushforward; + _d_theta += _r2; + _d_costheta -= _r_d1; + } + { + theta = _t1; + double _r_d0 = _d_theta; + double _r1 = + _r_d0 * + clad::custom_derivatives::sqrt_pushforward(sqtheta, 1.).pushforward; + _d_sqtheta += _r1; + _d_theta -= _r_d0; + } + } else { + for (; _t13; _t13--) { + i2--; + rotatedPt[i2] = clad::pop(_t14); + double _r_d7 = _d_rotatedPt[i2]; + _d_pt[i2] += _r_d7; + _d_rot_cross_pt[i2] += _r_d7; + _d_rotatedPt[i2] -= _r_d7; + _d_rotatedPt[i2]; + } + { + rot = _t11; + pt = _t12; + cross_pullback(_t11, _t12, rot_cross_pt, _d_rot, _d_pt, _d_rot_cross_pt); + } + } + { + rot = _t0; + int _grad0 = 0; + sqsum_pullback(3, _t0, _d_sqtheta, &_grad0, _d_rot); + int _r0 = _grad0; + } +} +void radial_distort_pullback(const double *const rad_params, double *proj, + clad::array_ref _d_rad_params, + clad::array_ref _d_proj) { + double _d_rsq = 0, _d_L = 0; + double _t0; + double *_t1; + double _t2; + double _t3; + double _t4; + double rsq, L; + _t0 = rsq; + _t1 = proj; + rsq = sqsum(2, proj); + _t2 = L; + L = 1. + rad_params[0] * rsq + rad_params[1] * rsq * rsq; + _t3 = proj[0]; + proj[0] = proj[0] * L; + _t4 = proj[1]; + proj[1] = proj[1] * L; + { + proj[1] = _t4; + double _r_d3 = _d_proj[1]; + _d_proj[1] += _r_d3 * L; + _d_L += proj[1] * _r_d3; + _d_proj[1] -= _r_d3; + _d_proj[1]; + } + { + proj[0] = _t3; + double _r_d2 = _d_proj[0]; + _d_proj[0] += _r_d2 * L; + _d_L += proj[0] * _r_d2; + _d_proj[0] -= _r_d2; + _d_proj[0]; + } + { + L = _t2; + double _r_d1 = _d_L; + _d_rad_params[0] += _r_d1 * rsq; + _d_rsq += rad_params[0] * _r_d1; + _d_rad_params[1] += _r_d1 * rsq * rsq; + _d_rsq += rad_params[1] * _r_d1 * rsq; + _d_rsq += rad_params[1] * rsq * _r_d1; + _d_L -= _r_d1; + } + { + rsq = _t0; + double _r_d0 = _d_rsq; + proj = _t1; + int _grad0 = 0; + sqsum_pullback(2, _t1, _r_d0, &_grad0, _d_proj); + int _r0 = _grad0; + _d_rsq -= _r_d0; + } +} +void project_pullback(const double *cam, const double *X, double *proj, + clad::array_ref _d_cam, + clad::array_ref _d_X, + clad::array_ref _d_proj) { + double *_d_C = 0; + clad::array _d_Xo(3UL), _d_Xcam(3UL); + double _t0; + double _t1; + double _t2; + const double *_t3; + double _t4; + double _t5; + const double *_t6; + double *_t7; + double _t8; + double _t9; + _d_C = &_d_cam[3]; + const double *const C = &cam[3]; + double Xo[3], Xcam[3]; + _t0 = Xo[0]; + Xo[0] = X[0] - C[0]; + _t1 = Xo[1]; + Xo[1] = X[1] - C[1]; + _t2 = Xo[2]; + Xo[2] = X[2] - C[2]; + _t3 = cam + 0; + rodrigues_rotate_point(cam + 0, Xo, Xcam); + _t4 = proj[0]; + proj[0] = Xcam[0] / Xcam[2]; + _t5 = proj[1]; + proj[1] = Xcam[1] / Xcam[2]; + _t6 = cam + 9; + _t7 = proj; + // CFIX-start + double tproj0 = proj[0]; + double tproj1 = proj[1]; + // CFIX-end + radial_distort(cam + 9, proj); + _t8 = proj[0]; + proj[0] = proj[0] * cam[6] + cam[7]; + _t9 = proj[1]; + proj[1] = proj[1] * cam[6] + cam[8]; + { + proj[1] = _t9; + double _r_d6 = _d_proj[1]; + _d_proj[1] += _r_d6 * cam[6]; + _d_cam[6] += proj[1] * _r_d6; + _d_cam[8] += _r_d6; + _d_proj[1] -= _r_d6; + _d_proj[1]; + } + { + proj[0] = _t8; + double _r_d5 = _d_proj[0]; + _d_proj[0] += _r_d5 * cam[6]; + _d_cam[6] += proj[0] * _r_d5; + _d_cam[7] += _r_d5; + _d_proj[0] -= _r_d5; + _d_proj[0]; + } + { + proj = _t7; + // CFIX-start + proj[0] = tproj0; + proj[1] = tproj1; + // CFIX-end + radial_distort_pullback(_t6, _t7, _d_cam.ptr_ref() + 9, _d_proj); + } + { + proj[1] = _t5; + double _r_d4 = _d_proj[1]; + _d_Xcam[1] += _r_d4 / Xcam[2]; + double _r1 = _r_d4 * -Xcam[1] / (Xcam[2] * Xcam[2]); + _d_Xcam[2] += _r1; + _d_proj[1] -= _r_d4; + _d_proj[1]; + } + { + proj[0] = _t4; + double _r_d3 = _d_proj[0]; + _d_Xcam[0] += _r_d3 / Xcam[2]; + double _r0 = _r_d3 * -Xcam[0] / (Xcam[2] * Xcam[2]); + _d_Xcam[2] += _r0; + _d_proj[0] -= _r_d3; + _d_proj[0]; + } + rodrigues_rotate_point_pullback(_t3, Xo, Xcam, _d_cam.ptr_ref() + 0, _d_Xo, + _d_Xcam); + { + Xo[2] = _t2; + double _r_d2 = _d_Xo[2]; + _d_X[2] += _r_d2; + _d_C[2] += -_r_d2; + _d_Xo[2] -= _r_d2; + _d_Xo[2]; + } + { + Xo[1] = _t1; + double _r_d1 = _d_Xo[1]; + _d_X[1] += _r_d1; + _d_C[1] += -_r_d1; + _d_Xo[1] -= _r_d1; + _d_Xo[1]; + } + { + Xo[0] = _t0; + double _r_d0 = _d_Xo[0]; + _d_X[0] += _r_d0; + _d_C[0] += -_r_d0; + _d_Xo[0] -= _r_d0; + _d_Xo[0]; + } +} + +void computeReprojError_pullback(const double *cam, const double *X, + const double *w, const double *feat, + double *err, clad::array_ref _d_cam, + clad::array_ref _d_X, + clad::array_ref _d_w, + clad::array_ref _d_feat, + clad::array_ref _d_err) { + clad::array _d_proj(2UL); + const double *_t0; + const double *_t1; + double _t2; + double _t3; + double proj[2]; + _t0 = cam; + _t1 = X; + project(cam, X, proj); + _t2 = err[0]; + err[0] = *w * (proj[0] - feat[0]); + _t3 = err[1]; + err[1] = *w * (proj[1] - feat[1]); + { + err[1] = _t3; + double _r_d1 = _d_err[1]; + *_d_w += _r_d1 * (proj[1] - feat[1]); + _d_proj[1] += *w * _r_d1; + _d_feat[1] += -*w * _r_d1; + _d_err[1] -= _r_d1; + _d_err[1]; + } + { + err[0] = _t2; + double _r_d0 = _d_err[0]; + *_d_w += _r_d0 * (proj[0] - feat[0]); + _d_proj[0] += *w * _r_d0; + _d_feat[0] += -*w * _r_d0; + _d_err[0] -= _r_d0; + _d_err[0]; + } + { + cam = _t0; + X = _t1; + project_pullback(_t0, _t1, proj, _d_cam, _d_X, _d_proj); + } +} +void computeReprojError_wrapper_grad( + const double *cam, const double *X, const double *w, const double *feat, + double *err, clad::array_ref _d_cam, clad::array_ref _d_X, + clad::array_ref _d_w, clad::array_ref _d_feat, + clad::array_ref _d_err) { + const double *_t0; + const double *_t1; + const double *_t2; + const double *_t3; + double *_t4; + _t0 = cam; + _t1 = X; + _t2 = w; + _t3 = feat; + _t4 = err; + computeReprojError(cam, X, w, feat, err); + goto _label0; +_label0: + _d_err[0] += 1; + { + cam = _t0; + X = _t1; + w = _t2; + feat = _t3; + err = _t4; + computeReprojError_pullback(_t0, _t1, _t2, _t3, _t4, _d_cam, _d_X, _d_w, + _d_feat, _d_err); + } +} +void computeZachWeightError_pullback(const double *const w, double *err, + clad::array_ref _d_w, + clad::array_ref _d_err) { + double _t0; + _t0 = *err; + *err = 1 - *w * (*w); + { + *err = _t0; + double _r_d0 = *_d_err; + *_d_w += -_r_d0 * (*w); + *_d_w += *w * -_r_d0; + *_d_err -= _r_d0; + *_d_err; + } +} + +void computeZachWeightError_wrapper_grad(const double *w, double *err, + clad::array_ref _d_w, + clad::array_ref _d_err) { + const double *_t0; + double *_t1; + _t0 = w; + _t1 = err; + computeZachWeightError(w, err); + goto _label0; +_label0: + _d_err[0] += 1; + { + w = _t0; + err = _t1; + computeZachWeightError_pullback(_t0, _t1, _d_w, _d_err); + } +} diff --git a/src/cpp/modules/cladLatest/ba/ba_grad.h b/src/cpp/modules/cladLatest/ba/ba_grad.h new file mode 100644 index 00000000..de42ade1 --- /dev/null +++ b/src/cpp/modules/cladLatest/ba/ba_grad.h @@ -0,0 +1,13 @@ +#include "clad/Differentiator/ArrayRef.h" + +void computeReprojError_pullback(const double *cam, const double *X, + const double *w, const double *feat, + double *err, clad::array_ref _d_cam, + clad::array_ref _d_X, + clad::array_ref _d_w, + clad::array_ref _d_feat, + clad::array_ref _d_err); + +void computeZachWeightError_pullback(const double *const w, double *err, + clad::array_ref _d_w, + clad::array_ref _d_err); \ No newline at end of file diff --git a/src/cpp/modules/cladLatest/gmm/gmm.h b/src/cpp/modules/cladLatest/gmm/gmm.h new file mode 100644 index 00000000..c7908de3 --- /dev/null +++ b/src/cpp/modules/cladLatest/gmm/gmm.h @@ -0,0 +1,299 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#include +#include +using std::vector; +#include "defs.h" +#include "matrix.h" + +// CLADFIX-Add-Start +// namespace clad { +// namespace custom_derivatives { +// namespace class_functions { +// template +// void resize_pullback(::std::vector *v, size_t n, ::std::vector *d_v, size_t d_n) { + +// } +// } +// } +// } +// CLADFIX-Add-end + +//////////////////////////////////////////////////////////// +//////////////////// Declarations ////////////////////////// +//////////////////////////////////////////////////////////// + +// d: dim +// k: number of gaussians +// n: number of points +// alphas: k logs of mixture weights (unnormalized), so +// weights = exp(log_alphas) / sum(exp(log_alphas)) +// means: d*k component means +// icf: (d*(d+1)/2)*k inverse covariance factors +// every icf entry stores firstly log of diagonal and then +// columnwise other entris +// To generate icf in MATLAB given covariance C : +// L = inv(chol(C, 'lower')); +// inv_cov_factor = [log(diag(L)); L(au_tril_indices(d, -1))] +// wishart: wishart distribution parameters +// x: d*n points +// err: 1 output +template +void gmm_objective(int d, int k, int n, const T* const alphas, const T* const means, + const T* const icf, const double* const x, Wishart wishart, T* err); + +// split of the outer loop over points +template +void gmm_objective_split_inner(int d, int k, + const T* const alphas, + const T* const means, + const T* const icf, + const double* const x, + Wishart wishart, + T* err); +// other terms which are outside the loop +template +void gmm_objective_split_other(int d, int k, int n, + const T* const alphas, + const T* const means, + const T* const icf, + Wishart wishart, + T* err); + +template +T logsumexp(int n, const T* const x); + +// p: dim +// k: number of components +// wishart parameters +// sum_qs: k sums of log diags of Qs +// Qdiags: d*k +// icf: (p*(p+1)/2)*k inverse covariance factors +template +T log_wishart_prior(int p, int k, + Wishart wishart, + const T* const sum_qs, + const T* const Qdiags, + const T* const icf); + +template +void preprocess_qs(int d, int k, + const T* const icf, + T* sum_qs, + T* Qdiags); + +template +void Qtimesx(int d, + const T* const Qdiag, + const T* const ltri, // strictly lower triangular part + const T* const x, + T* out); + +//////////////////////////////////////////////////////////// +//////////////////// Definitions /////////////////////////// +//////////////////////////////////////////////////////////// + +template +T logsumexp(int n, const T* const x) +{ + T mx = arr_max(n, x); + T semx = 0.; + for (int i = 0; i < n; i++) + { + semx = semx + exp(x[i] - mx); + } + return log(semx) + mx; +} + +template +T log_wishart_prior(int p, int k, + Wishart wishart, + const T* const sum_qs, + const T* const Qdiags, + const T* const icf) +{ + int n = p + wishart.m + 1; + int icf_sz = p * (p + 1) / 2; + + double C = n * p * (log(wishart.gamma) - 0.5 * log(2)) - log_gamma_distrib(0.5 * n, p); + + T out = 0; + for (int ik = 0; ik < k; ik++) + { + T frobenius = sqnorm(p, &Qdiags[ik * p]) + sqnorm(icf_sz - p, &icf[ik * icf_sz + p]); + out = out + 0.5 * wishart.gamma * wishart.gamma * (frobenius) + -wishart.m * sum_qs[ik]; + } + + return out - k * C; +} + +template +void preprocess_qs(int d, int k, + const T* const icf, + T* sum_qs, + T* Qdiags) +{ + int icf_sz = d * (d + 1) / 2; + for (int ik = 0; ik < k; ik++) + { + sum_qs[ik] = 0.; + for (int id = 0; id < d; id++) + { + T q = icf[ik * icf_sz + id]; + sum_qs[ik] = sum_qs[ik] + q; + Qdiags[ik * d + id] = exp(q); + } + } +} + +template +void Qtimesx(int d, + const T* const Qdiag, + const T* const ltri, // strictly lower triangular part + const T* const x, + T* out) +{ + for (int id = 0; id < d; id++) + out[id] = Qdiag[id] * x[id]; + + int Lparamsidx = 0; + for (int i = 0; i < d; i++) + { + for (int j = i + 1; j < d; j++) + { + out[j] = out[j] + ltri[Lparamsidx] * x[i]; + Lparamsidx++; + } + } +} + +template +void gmm_objective(int d, int k, int n, + const T* const alphas, + const T* const means, + const T* const icf, + const double* const x, + Wishart wishart, + T* err) +{ + const double CONSTANT = -n * d * 0.5 * log(2 * PI); + int icf_sz = d * (d + 1) / 2; + + // CLADFIX-Modify-DeleteStart(VectorInit) + // vector Qdiags(d * k); + // vector sum_qs(k); + // vector xcentered(d); + // vector Qxcentered(d); + // vector main_term(k); + // CLADFIX-Modify-DeleteEnd(VectorInit) + // CLADFIX-Modify-AddStart(VectorInit) + T Qdiags[d * k]; + T sum_qs[k]; + T xcentered[d]; + T Qxcentered[d]; + T main_term[k]; + //CLADFIX-Modify-AddEnd(VectorInit) + + preprocess_qs(d, k, icf, &sum_qs[0], &Qdiags[0]); + + T slse = 0.; + for (int ix = 0; ix < n; ix++) + { + for (int ik = 0; ik < k; ik++) + { + subtract(d, &x[ix * d], &means[ik * d], &xcentered[0]); + Qtimesx(d, &Qdiags[ik * d], &icf[ik * icf_sz + d], &xcentered[0], &Qxcentered[0]); + + main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5 * sqnorm(d, &Qxcentered[0]); + } + slse = slse + logsumexp(k, &main_term[0]); + } + + T lse_alphas = logsumexp(k, alphas); + + *err = CONSTANT + slse - n * lse_alphas; + + *err = *err + log_wishart_prior(d, k, wishart, &sum_qs[0], &Qdiags[0], icf); + +} + +template +void gmm_objective_split_inner(int d, int k, + const T* const alphas, + const T* const means, + const T* const icf, + const double* const x, + Wishart wishart, + T* err) +{ + int icf_sz = d * (d + 1) / 2; + + T* Ldiag = new T[d]; + T* xcentered = new T[d]; + T* mahal = new T[d]; + T* lse = new T[k]; + + for (int ik = 0; ik < k; ik++) + { + int icf_off = ik * icf_sz; + T sumlog_Ldiag(0.); + for (int id = 0; id < d; id++) + { + sumlog_Ldiag = sumlog_Ldiag + icf[icf_off + id]; + Ldiag[id] = exp(icf[icf_off + id]); + } + + for (int id = 0; id < d; id++) + { + xcentered[id] = x[id] - means[ik * d + id]; + mahal[id] = Ldiag[id] * xcentered[id]; + } + int Lparamsidx = d; + for (int i = 0; i < d; i++) + { + for (int j = i + 1; j < d; j++) + { + mahal[j] = mahal[j] + icf[icf_off + Lparamsidx] * xcentered[i]; + Lparamsidx++; + } + } + T sqsum_mahal(0.); + for (int id = 0; id < d; id++) + { + sqsum_mahal = sqsum_mahal + mahal[id] * mahal[id]; + } + + lse[ik] = alphas[ik] + sumlog_Ldiag - 0.5 * sqsum_mahal; + } + + *err = logsumexp(k, lse); + + delete[] mahal; + delete[] xcentered; + delete[] Ldiag; + delete[] lse; +} + +template +void gmm_objective_split_other(int d, int k, int n, + const T* const alphas, + const T* const means, + const T* const icf, + Wishart wishart, + T* err) +{ + const double CONSTANT = -n * d * 0.5 * log(2 * PI); + + T lse_alphas = logsumexp(k, alphas); + + T* sum_qs = new T[k]; + T* Qdiags = new T[d * k]; + preprocess_qs(d, k, icf, sum_qs, Qdiags); + *err = CONSTANT - n * lse_alphas + log_wishart_prior(d, k, wishart, sum_qs, Qdiags, icf); + delete[] sum_qs; + delete[] Qdiags; +} \ No newline at end of file diff --git a/src/cpp/modules/cladLatest/gmm/gmm_grad.cpp b/src/cpp/modules/cladLatest/gmm/gmm_grad.cpp new file mode 100644 index 00000000..660263b8 --- /dev/null +++ b/src/cpp/modules/cladLatest/gmm/gmm_grad.cpp @@ -0,0 +1,553 @@ +#include "gmm.h" +// CLADFIX-Add-Start +#include "clad/Differentiator/Differentiator.h" +// CLADFIX-Add-End + +void gmm_objective_pullback(int d, int k, int n, const double *const alphas, const double *const means, const double *const icf, const double *const x, Wishart wishart, double *err, int *_d_d, int *_d_k, int *_d_n, double *_d_alphas, double *_d_means, double *_d_icf, double *_d_x, Wishart *_d_wishart, double *_d_err); +void gmm_objective_wrapper_grad(int d, int k, int n, const double *const alphas, const double *const means, const double *const icf, const double *const x, Wishart wishart, double *err, int *_d_d, int *_d_k, int *_d_n, double *_d_alphas, double *_d_means, double *_d_icf, double *_d_x, Wishart *_d_wishart, double *_d_err) { + gmm_objective(d, k, n, alphas, means, icf, x, wishart, err); + goto _label0; + _label0: + _d_err[0] += 1; + { + int _r0 = 0; + int _r1 = 0; + int _r2 = 0; + Wishart _r3 = {}; + gmm_objective_pullback(d, k, n, alphas, means, icf, x, wishart, err, &_r0, &_r1, &_r2, _d_alphas, _d_means, _d_icf, _d_x, &_r3, _d_err); + *_d_d += _r0; + *_d_k += _r1; + *_d_n += _r2; + } +} +void preprocess_qs_pullback(int d, int k, const double *const icf, double *sum_qs, double *Qdiags, int *_d_d, int *_d_k, double *_d_icf, double *_d_sum_qs, double *_d_Qdiags); +void subtract_pullback(int d, const double *const x, const double *const y, double *out, int *_d_d, double *_d_x, double *_d_y, double *_d_out); +void Qtimesx_pullback(int d, const double *const Qdiag, const double *const ltri, const double *const x, double *out, int *_d_d, double *_d_Qdiag, double *_d_ltri, double *_d_x, double *_d_out); +void sqnorm_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x); +void logsumexp_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x); +void log_wishart_prior_pullback(int p, int k, Wishart wishart, const double *const sum_qs, const double *const Qdiags, const double *const icf, double _d_y, int *_d_p, int *_d_k, Wishart *_d_wishart, double *_d_sum_qs, double *_d_Qdiags, double *_d_icf); +void gmm_objective_pullback(int d, int k, int n, const double *const alphas, const double *const means, const double *const icf, const double *const x, Wishart wishart, double *err, int *_d_d, int *_d_k, int *_d_n, double *_d_alphas, double *_d_means, double *_d_icf, double *_d_x, Wishart *_d_wishart, double *_d_err) { + double _t0; + double _d_CONSTANT = 0; + int _d_icf_sz = 0; + double _d_slse = 0; + unsigned long _t1; + int _d_ix = 0; + int ix = 0; + clad::tape _t2 = {}; + clad::tape _t3 = {}; + int _d_ik = 0; + int ik = 0; + clad::tape _t4 = {}; + clad::tape _t5 = {}; + clad::tape _t6 = {}; + double _d_lse_alphas = 0; + double _t7; + double _t8; + _t0 = log(2 * 3.1415926535900001); + const double CONSTANT = -n * d * 0.5 * _t0; + int icf_sz = d * (d + 1) / 2; + double _d_Qdiags[d * k]; + clad::zero_init(_d_Qdiags, d * k); + double Qdiags[d * k]; + double _d_sum_qs[k]; + clad::zero_init(_d_sum_qs, k); + double sum_qs[k]; + double _d_xcentered[d]; + clad::zero_init(_d_xcentered, d); + double xcentered[d]; + double _d_Qxcentered[d]; + clad::zero_init(_d_Qxcentered, d); + double Qxcentered[d]; + double _d_main_term[k]; + clad::zero_init(_d_main_term, k); + double main_term[k]; + preprocess_qs(d, k, icf, &sum_qs[0], &Qdiags[0]); + double slse = 0.; + _t1 = 0; + for (ix = 0; ix < n; ix++) { + _t1++; + clad::push(_t2, 0UL); + for (clad::push(_t3, ik) , ik = 0; ik < k; ik++) { + clad::back(_t2)++; + subtract(d, &x[ix * d], &means[ik * d], &xcentered[0]); + Qtimesx(d, &Qdiags[ik * d], &icf[ik * icf_sz + d], &xcentered[0], &Qxcentered[0]); + clad::push(_t4, main_term[ik]); + main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5 * clad::push(_t5, sqnorm(d, &Qxcentered[0])); + } + clad::push(_t6, slse); + slse = slse + logsumexp(k, &main_term[0]); + } + double lse_alphas = logsumexp(k, alphas); + _t7 = *err; + *err = CONSTANT + slse - n * lse_alphas; + _t8 = *err; + *err = *err + log_wishart_prior(d, k, wishart, &sum_qs[0], &Qdiags[0], icf); + { + *err = _t8; + double _r_d3 = *_d_err; + *_d_err -= _r_d3; + *_d_err += _r_d3; + int _r7 = 0; + int _r8 = 0; + Wishart _r9 = {}; + log_wishart_prior_pullback(d, k, wishart, &sum_qs[0], &Qdiags[0], icf, _r_d3, &_r7, &_r8, &_r9, &_d_sum_qs[0], &_d_Qdiags[0], _d_icf); + *_d_d += _r7; + *_d_k += _r8; + } + { + *err = _t7; + double _r_d2 = *_d_err; + *_d_err -= _r_d2; + _d_CONSTANT += _r_d2; + _d_slse += _r_d2; + *_d_n += -_r_d2 * lse_alphas; + _d_lse_alphas += n * -_r_d2; + } + { + int _r6 = 0; + logsumexp_pullback(k, alphas, _d_lse_alphas, &_r6, _d_alphas); + *_d_k += _r6; + } + for (; _t1; _t1--) { + ix--; + { + slse = clad::pop(_t6); + double _r_d1 = _d_slse; + _d_slse -= _r_d1; + _d_slse += _r_d1; + int _r5 = 0; + logsumexp_pullback(k, &main_term[0], _r_d1, &_r5, &_d_main_term[0]); + *_d_k += _r5; + } + { + for (; clad::back(_t2); clad::back(_t2)--) { + ik--; + { + main_term[ik] = clad::pop(_t4); + double _r_d0 = _d_main_term[ik]; + _d_main_term[ik] -= _r_d0; + _d_alphas[ik] += _r_d0; + _d_sum_qs[ik] += _r_d0; + int _r4 = 0; + sqnorm_pullback(d, &Qxcentered[0], 0.5 * -_r_d0, &_r4, &_d_Qxcentered[0]); + *_d_d += _r4; + } + { + int _r3 = 0; + Qtimesx_pullback(d, &Qdiags[ik * d], &icf[ik * icf_sz + d], &xcentered[0], &Qxcentered[0], &_r3, &_d_Qdiags[ik * d], &_d_icf[ik * icf_sz + d], &_d_xcentered[0], &_d_Qxcentered[0]); + *_d_d += _r3; + } + { + int _r2 = 0; + subtract_pullback(d, &x[ix * d], &means[ik * d], &xcentered[0], &_r2, &_d_x[ix * d], &_d_means[ik * d], &_d_xcentered[0]); + *_d_d += _r2; + } + } + { + _d_ik = 0; + ik = clad::pop(_t3); + } + clad::pop(_t2); + } + } + { + int _r0 = 0; + int _r1 = 0; + preprocess_qs_pullback(d, k, icf, &sum_qs[0], &Qdiags[0], &_r0, &_r1, _d_icf, &_d_sum_qs[0], &_d_Qdiags[0]); + *_d_d += _r0; + *_d_k += _r1; + } + { + *_d_d += _d_icf_sz / 2 * (d + 1); + *_d_d += d * _d_icf_sz / 2; + } + { + *_d_n += -_d_CONSTANT * _t0 * 0.5 * d; + *_d_d += -n * _d_CONSTANT * _t0 * 0.5; + } +} +void preprocess_qs_pullback(int d, int k, const double *const icf, double *sum_qs, double *Qdiags, int *_d_d, int *_d_k, double *_d_icf, double *_d_sum_qs, double *_d_Qdiags) { + int _d_icf_sz = 0; + unsigned long _t0; + int _d_ik = 0; + int ik = 0; + clad::tape _t1 = {}; + clad::tape _t2 = {}; + clad::tape _t3 = {}; + int _d_id = 0; + int id = 0; + clad::tape _t4 = {}; + double _d_q = 0; + double q = 0; + clad::tape _t5 = {}; + clad::tape _t6 = {}; + int icf_sz = d * (d + 1) / 2; + _t0 = 0; + for (ik = 0; ik < k; ik++) { + _t0++; + clad::push(_t1, sum_qs[ik]); + sum_qs[ik] = 0.; + clad::push(_t2, 0UL); + for (clad::push(_t3, id) , id = 0; id < d; id++) { + clad::back(_t2)++; + clad::push(_t4, q) , q = icf[ik * icf_sz + id]; + clad::push(_t5, sum_qs[ik]); + sum_qs[ik] = sum_qs[ik] + q; + clad::push(_t6, Qdiags[ik * d + id]); + Qdiags[ik * d + id] = exp(q); + } + } + for (; _t0; _t0--) { + ik--; + { + for (; clad::back(_t2); clad::back(_t2)--) { + id--; + { + Qdiags[ik * d + id] = clad::pop(_t6); + double _r_d2 = _d_Qdiags[ik * d + id]; + _d_Qdiags[ik * d + id] -= _r_d2; + double _r0 = 0; + _r0 += _r_d2 * clad::custom_derivatives::exp_pushforward(q, 1.).pushforward; + _d_q += _r0; + } + { + sum_qs[ik] = clad::pop(_t5); + double _r_d1 = _d_sum_qs[ik]; + _d_sum_qs[ik] -= _r_d1; + _d_sum_qs[ik] += _r_d1; + _d_q += _r_d1; + } + { + _d_icf[ik * icf_sz + id] += _d_q; + _d_q = 0; + q = clad::pop(_t4); + } + } + { + _d_id = 0; + id = clad::pop(_t3); + } + clad::pop(_t2); + } + { + sum_qs[ik] = clad::pop(_t1); + double _r_d0 = _d_sum_qs[ik]; + _d_sum_qs[ik] -= _r_d0; + } + } + { + *_d_d += _d_icf_sz / 2 * (d + 1); + *_d_d += d * _d_icf_sz / 2; + } +} +void subtract_pullback(int d, const double *const x, const double *const y, double *out, int *_d_d, double *_d_x, double *_d_y, double *_d_out) { + unsigned long _t0; + int _d_id = 0; + int id = 0; + clad::tape _t1 = {}; + _t0 = 0; + for (id = 0; id < d; id++) { + _t0++; + clad::push(_t1, out[id]); + out[id] = x[id] - y[id]; + } + for (; _t0; _t0--) { + id--; + { + out[id] = clad::pop(_t1); + double _r_d0 = _d_out[id]; + _d_out[id] -= _r_d0; + _d_x[id] += _r_d0; + _d_y[id] += -_r_d0; + } + } +} +void Qtimesx_pullback(int d, const double *const Qdiag, const double *const ltri, const double *const x, double *out, int *_d_d, double *_d_Qdiag, double *_d_ltri, double *_d_x, double *_d_out) { + unsigned long _t0; + int _d_id = 0; + int id = 0; + clad::tape _t1 = {}; + int _d_Lparamsidx = 0; + unsigned long _t2; + int _d_i = 0; + int i = 0; + clad::tape _t3 = {}; + clad::tape _t4 = {}; + int _d_j = 0; + int j = 0; + clad::tape _t5 = {}; + _t0 = 0; + for (id = 0; id < d; id++) { + _t0++; + clad::push(_t1, out[id]); + out[id] = Qdiag[id] * x[id]; + } + int Lparamsidx = 0; + _t2 = 0; + for (i = 0; i < d; i++) { + _t2++; + clad::push(_t3, 0UL); + for (clad::push(_t4, j) , j = i + 1; j < d; j++) { + clad::back(_t3)++; + clad::push(_t5, out[j]); + out[j] = out[j] + ltri[Lparamsidx] * x[i]; + Lparamsidx++; + } + } + for (; _t2; _t2--) { + i--; + { + for (; clad::back(_t3); clad::back(_t3)--) { + j--; + Lparamsidx--; + { + out[j] = clad::pop(_t5); + double _r_d1 = _d_out[j]; + _d_out[j] -= _r_d1; + _d_out[j] += _r_d1; + _d_ltri[Lparamsidx] += _r_d1 * x[i]; + _d_x[i] += ltri[Lparamsidx] * _r_d1; + } + } + { + _d_i += _d_j; + _d_j = 0; + j = clad::pop(_t4); + } + clad::pop(_t3); + } + } + for (; _t0; _t0--) { + id--; + out[id] = clad::pop(_t1); + double _r_d0 = _d_out[id]; + _d_out[id] -= _r_d0; + _d_Qdiag[id] += _r_d0 * x[id]; + _d_x[id] += Qdiag[id] * _r_d0; + } +} +void sqnorm_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x) { + double _d_res = 0; + unsigned long _t0; + int _d_i = 0; + int i = 0; + clad::tape _t1 = {}; + double res = x[0] * x[0]; + _t0 = 0; + for (i = 1; i < n; i++) { + _t0++; + clad::push(_t1, res); + res = res + x[i] * x[i]; + } + goto _label0; + _label0: + _d_res += _d_y; + for (; _t0; _t0--) { + i--; + res = clad::pop(_t1); + double _r_d0 = _d_res; + _d_res -= _r_d0; + _d_res += _r_d0; + _d_x[i] += _r_d0 * x[i]; + _d_x[i] += x[i] * _r_d0; + } + { + _d_x[0] += _d_res * x[0]; + _d_x[0] += x[0] * _d_res; + } +} +void arr_max_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x); +void logsumexp_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x) { + double _d_mx = 0; + double _d_semx = 0; + unsigned long _t0; + int _d_i = 0; + int i = 0; + clad::tape _t1 = {}; + double mx = arr_max(n, x); + double semx = 0.; + _t0 = 0; + for (i = 0; i < n; i++) { + _t0++; + clad::push(_t1, semx); + semx = semx + exp(x[i] - mx); + } + goto _label0; + _label0: + { + double _r2 = 0; + _r2 += _d_y * clad::custom_derivatives::log_pushforward(semx, 1.).pushforward; + _d_semx += _r2; + _d_mx += _d_y; + } + for (; _t0; _t0--) { + i--; + { + semx = clad::pop(_t1); + double _r_d0 = _d_semx; + _d_semx -= _r_d0; + _d_semx += _r_d0; + double _r1 = 0; + _r1 += _r_d0 * clad::custom_derivatives::exp_pushforward(x[i] - mx, 1.).pushforward; + _d_x[i] += _r1; + _d_mx += -_r1; + } + } + { + int _r0 = 0; + arr_max_pullback(n, x, _d_mx, &_r0, _d_x); + *_d_n += _r0; + } +} +inline void log_gamma_distrib_pullback(double a, double p, double _d_y, double *_d_a, double *_d_p); +void log_wishart_prior_pullback(int p, int k, Wishart wishart, const double *const sum_qs, const double *const Qdiags, const double *const icf, double _d_y, int *_d_p, int *_d_k, Wishart *_d_wishart, double *_d_sum_qs, double *_d_Qdiags, double *_d_icf) { + int _d_n = 0; + int _d_icf_sz = 0; + double _t0; + double _t1; + double _d_C = 0; + double _d_out = 0; + unsigned long _t2; + int _d_ik = 0; + int ik = 0; + clad::tape _t3 = {}; + double _d_frobenius = 0; + double frobenius = 0; + clad::tape _t4 = {}; + int n = p + wishart.m + 1; + int icf_sz = p * (p + 1) / 2; + _t1 = log(2); + _t0 = (log(wishart.gamma) - 0.5 * _t1); + double C = n * p * _t0 - log_gamma_distrib(0.5 * n, p); + double out = 0; + _t2 = 0; + for (ik = 0; ik < k; ik++) { + _t2++; + clad::push(_t3, frobenius) , frobenius = sqnorm(p, &Qdiags[ik * p]) + sqnorm(icf_sz - p, &icf[ik * icf_sz + p]); + clad::push(_t4, out); + out = out + 0.5 * wishart.gamma * wishart.gamma * (frobenius) - wishart.m * sum_qs[ik]; + } + goto _label0; + _label0: + { + _d_out += _d_y; + *_d_k += -_d_y * C; + _d_C += k * -_d_y; + } + for (; _t2; _t2--) { + ik--; + { + out = clad::pop(_t4); + double _r_d0 = _d_out; + _d_out -= _r_d0; + _d_out += _r_d0; + (*_d_wishart).gamma += 0.5 * _r_d0 * (frobenius) * wishart.gamma; + (*_d_wishart).gamma += 0.5 * wishart.gamma * _r_d0 * (frobenius); + _d_frobenius += 0.5 * wishart.gamma * wishart.gamma * _r_d0; + (*_d_wishart).m += -_r_d0 * sum_qs[ik]; + _d_sum_qs[ik] += wishart.m * -_r_d0; + } + { + int _r3 = 0; + sqnorm_pullback(p, &Qdiags[ik * p], _d_frobenius, &_r3, &_d_Qdiags[ik * p]); + *_d_p += _r3; + int _r4 = 0; + sqnorm_pullback(icf_sz - p, &icf[ik * icf_sz + p], _d_frobenius, &_r4, &_d_icf[ik * icf_sz + p]); + _d_icf_sz += _r4; + *_d_p += -_r4; + _d_frobenius = 0; + frobenius = clad::pop(_t3); + } + } + { + _d_n += _d_C * _t0 * p; + *_d_p += n * _d_C * _t0; + double _r0 = 0; + _r0 += n * p * _d_C * clad::custom_derivatives::log_pushforward(wishart.gamma, 1.).pushforward; + (*_d_wishart).gamma += _r0; + double _r1 = 0; + double _r2 = 0; + log_gamma_distrib_pullback(0.5 * n, p, -_d_C, &_r1, &_r2); + _d_n += 0.5 * _r1; + *_d_p += _r2; + } + { + *_d_p += _d_icf_sz / 2 * (p + 1); + *_d_p += p * _d_icf_sz / 2; + } + { + *_d_p += _d_n; + (*_d_wishart).m += _d_n; + } +} +void arr_max_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x) { + double _d_m = 0; + unsigned long _t0; + int _d_i = 0; + int i = 0; + clad::tape _t2 = {}; + clad::tape _t3 = {}; + double m = x[0]; + _t0 = 0; + for (i = 1; i < n; i++) { + _t0++; + bool _t1 = m < x[i]; + { + if (_t1) { + clad::push(_t3, m); + m = x[i]; + } + clad::push(_t2, _t1); + } + } + goto _label0; + _label0: + _d_m += _d_y; + for (; _t0; _t0--) { + i--; + if (clad::pop(_t2)) { + m = clad::pop(_t3); + double _r_d0 = _d_m; + _d_m -= _r_d0; + _d_x[i] += _r_d0; + } + } + _d_x[0] += _d_m; +} +inline void log_gamma_distrib_pullback(double a, double p, double _d_y, double *_d_a, double *_d_p) { + double _t0; + double _d_out = 0; + unsigned long _t1; + int _d_j = 0; + int j = 0; + clad::tape _t2 = {}; + _t0 = log(3.1415926535900001); + double out = 0.25 * p * (p - 1) * _t0; + _t1 = 0; + for (j = 1; j <= p; j++) { + _t1++; + clad::push(_t2, out); + out = out + lgamma(a + 0.5 * (1 - j)); + } + goto _label0; + _label0: + _d_out += _d_y; + for (; _t1; _t1--) { + j--; + { + out = clad::pop(_t2); + double _r_d0 = _d_out; + _d_out -= _r_d0; + _d_out += _r_d0; + double _r0 = 0; + _r0 += _r_d0 * numerical_diff::forward_central_difference(lgamma, a + 0.5 * (1 - j), 0, 0, a + 0.5 * (1 - j)); + *_d_a += _r0; + _d_j += -0.5 * _r0; + } + } + { + *_d_p += 0.25 * _d_out * _t0 * (p - 1); + *_d_p += 0.25 * p * _d_out * _t0; + } +} \ No newline at end of file diff --git a/src/cpp/modules/cladLatest/gmm/gmm_grad.h b/src/cpp/modules/cladLatest/gmm/gmm_grad.h new file mode 100644 index 00000000..c39f2aa1 --- /dev/null +++ b/src/cpp/modules/cladLatest/gmm/gmm_grad.h @@ -0,0 +1,3 @@ +#include "gmm.h" + +void gmm_objective_pullback(int d, int k, int n, const double *const alphas, const double *const means, const double *const icf, const double *const x, Wishart wishart, double *err, int *_d_d, int *_d_k, int *_d_n, double *_d_alphas, double *_d_means, double *_d_icf, double *_d_x, Wishart *_d_wishart, double *_d_err); \ No newline at end of file diff --git a/src/cpp/modules/cladLatestTBR/CMakeLists.txt b/src/cpp/modules/cladLatestTBR/CMakeLists.txt new file mode 100644 index 00000000..7575ef22 --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/CMakeLists.txt @@ -0,0 +1,15 @@ +project("ADBench_CladLatestTBR" CXX) + +add_library("CladLatestTBR" MODULE) + +SET(clad_install_root "/home/mad-scientist/clubs/open-source/clad/formal/builds/build-llvm-16/inst") + +target_sources("CladLatestTBR" PRIVATE "${CMAKE_SOURCE_DIR}/src/cpp/shared/utils.cpp") +target_sources("CladLatestTBR" PRIVATE "ba/ba_grad.cpp") +target_sources("CladLatestTBR" PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/CladLatestTBRBA.cpp") +target_sources("CladLatestTBR" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/gmm/gmm_grad_tbr.cpp") +target_sources("CladLatestTBR" PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/CladLatestTBRGMM.cpp") + +target_include_directories("CladLatestTBR" PRIVATE ${clad_install_root}/include) +target_include_directories("CladLatestTBR" PRIVATE ${CMAKE_SOURCE_DIR}/src/cpp/shared) +# add_compile_definitions("CladLatestTBR" CLAD_NO_NUM_DIFF) \ No newline at end of file diff --git a/src/cpp/modules/cladLatestTBR/CladLatestTBRBA.cpp b/src/cpp/modules/cladLatestTBR/CladLatestTBRBA.cpp new file mode 100644 index 00000000..5a89e5ee --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/CladLatestTBRBA.cpp @@ -0,0 +1,115 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#include "CladLatestTBRBA.h" +#include "ba/ba_grad.h" + +// This function must be called before any other function. +void CladLatestTBRBA::prepare(BAInput &&input) { + this->input = input; + result = {std::vector(2 * this->input.p), + std::vector(this->input.p), + BASparseMat(this->input.n, this->input.m, this->input.p)}; + + reproj_err_d = std::vector(2 * (BA_NCAMPARAMS + 3 + 1)); + reproj_err_d_row = std::vector(BA_NCAMPARAMS + 3 + 1); +} + +BAOutput CladLatestTBRBA::output() { return result; } + +void CladLatestTBRBA::calculate_objective(int times) { + for (int i = 0; i < times; i++) { + ba_objective(input.n, input.m, input.p, input.cams.data(), input.X.data(), + input.w.data(), input.obs.data(), input.feats.data(), + result.reproj_err.data(), result.w_err.data()); + } +} + +void CladLatestTBRBA::calculate_jacobian(int times) { + for (int i = 0; i < times; i++) { + result.J.clear(); + calculate_reproj_error_jacobian_part(); + calculate_weight_error_jacobian_part(); + } +} + +void CladLatestTBRBA::calculate_reproj_error_jacobian_part() { + double + errb[2]; // stores dY + // (i-th element equals to 1.0 for calculating i-th jacobian row) + + double err[2]; // stores fictive result + // (Tapenade doesn't calculate an original function in + // reverse mode) + + clad::array_ref cam_gradient_part(reproj_err_d_row.data(), + reproj_err_d_row.size()); + clad::array_ref x_gradient_part( + reproj_err_d_row.data() + BA_NCAMPARAMS, + reproj_err_d_row.size() - BA_NCAMPARAMS); + clad::array_ref weight_gradient_part( + reproj_err_d_row.data() + BA_NCAMPARAMS + 3, + reproj_err_d_row.size() - BA_NCAMPARAMS - 3); + std::vector feats_gradient_part(2, 0); + clad::array_ref feats_gradient_part_ref(feats_gradient_part.data(), + feats_gradient_part.size()); + clad::array_ref errb_ref(errb, 2); + + for (int i = 0; i < input.p; i++) { + std::fill(reproj_err_d_row.begin(), reproj_err_d_row.end(), 0); + std::fill(feats_gradient_part.begin(), feats_gradient_part.end(), 0); + + int camIdx = input.obs[2 * i + 0]; + int ptIdx = input.obs[2 * i + 1]; + + // calculate first row + errb[0] = 1.0; + errb[1] = 0.0; + computeReprojError_pullback( + &input.cams[camIdx * BA_NCAMPARAMS], &input.X[ptIdx * 3], &input.w[i], + &input.feats[i * 2], err, cam_gradient_part, x_gradient_part, + weight_gradient_part, feats_gradient_part_ref, errb_ref); + + // fill first row elements + for (int j = 0; j < BA_NCAMPARAMS + 3 + 1; j++) { + reproj_err_d[2 * j] = reproj_err_d_row[j]; + } + + std::fill(reproj_err_d_row.begin(), reproj_err_d_row.end(), 0); + std::fill(feats_gradient_part.begin(), feats_gradient_part.end(), 0); + // calculate second row + errb[0] = 0.0; + errb[1] = 1.0; + computeReprojError_pullback( + &input.cams[camIdx * BA_NCAMPARAMS], &input.X[ptIdx * 3], &input.w[i], + &input.feats[i * 2], err, cam_gradient_part, x_gradient_part, + weight_gradient_part, feats_gradient_part_ref, errb); + + // fill second row elements + for (int j = 0; j < BA_NCAMPARAMS + 3 + 1; j++) { + reproj_err_d[2 * j + 1] = reproj_err_d_row[j]; + } + + result.J.insert_reproj_err_block(i, camIdx, ptIdx, reproj_err_d.data()); + } +} + +void CladLatestTBRBA::calculate_weight_error_jacobian_part() { + for (int j = 0; j < input.p; j++) { + double wb = 0; // stores calculated derivative + + double err = 0.0; // stores fictive result + // (Tapenade doesn't calculate an original function in + // reverse mode) + + double errb = 1.0; // stores dY + // (equals to 1.0 for derivative calculation) + + computeZachWeightError_pullback(&input.w[j], &err, &wb, &errb); + result.J.insert_w_err_block(j, wb); + } +} + +extern "C" DLL_PUBLIC ITest *get_ba_test() { + return new CladLatestTBRBA(); +} \ No newline at end of file diff --git a/src/cpp/modules/cladLatestTBR/CladLatestTBRBA.h b/src/cpp/modules/cladLatestTBR/CladLatestTBRBA.h new file mode 100644 index 00000000..b0edce27 --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/CladLatestTBRBA.h @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#include "../../shared/ITest.h" +#include "../../shared/BAData.h" +#include "../../shared/defs.h" + +#include "ba/ba.h" + +#include + +class CladLatestTBRBA : public ITest +{ +private: + BAInput input; + BAOutput result; + std::vector state; + + // buffer for reprojection error jacobian part holding (column-major) + std::vector reproj_err_d; + + // buffer for reprojection error jacobian block row holding + std::vector reproj_err_d_row; +public: + // This function must be called before any other function. + virtual void prepare(BAInput&& input) override; + + virtual void calculate_objective(int times) override; + virtual void calculate_jacobian(int times) override; + virtual BAOutput output() override; + + ~CladLatestTBRBA() {} + +private: + void calculate_weight_error_jacobian_part(); + void calculate_reproj_error_jacobian_part(); +}; \ No newline at end of file diff --git a/src/cpp/modules/cladLatestTBR/CladLatestTBRGMM.cpp b/src/cpp/modules/cladLatestTBR/CladLatestTBRGMM.cpp new file mode 100644 index 00000000..c00c7273 --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/CladLatestTBRGMM.cpp @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#include "CladLatestTBRGMM.h" + +#include "gmm/gmm.h" +#include "gmm/gmm_grad_tbr.h" + +// This function must be called before any other function. +void CladLatestTBRGMM::prepare(GMMInput&& input) +{ + this->input = input; + int Jcols = (this->input.k * (this->input.d + 1) * (this->input.d + 2)) / 2; + result = { 0, std::vector(Jcols) }; +} + + + +GMMOutput CladLatestTBRGMM::output() +{ + return result; +} + + + +void CladLatestTBRGMM::calculate_objective(int times) +{ + for (int i = 0; i < times; i++) + { + gmm_objective( + input.d, + input.k, + input.n, + input.alphas.data(), + input.means.data(), + input.icf.data(), + input.x.data(), + input.wishart, + &result.objective + ); + } +} + + + +void CladLatestTBRGMM::calculate_jacobian(int times) +{ + double* alphas_gradient_part = result.gradient.data(); + double* means_gradient_part = result.gradient.data() + input.alphas.size(); + double* icf_gradient_part = + result.gradient.data() + + input.alphas.size() + + input.means.size(); + std::vector d_x(input.x.size(), 0); + + for (int i = 0; i < times; i++) + { + double tmp = 0.0; // stores fictive result + // (Tapenade doesn't calculate an original function in reverse mode) + + double errb = 1.0; // stores dY + // (equals to 1.0 for gradient calculation) + + int d_d = 0, d_k = 0, d_n = 0; + Wishart d_wishart{0, 0}; + std::fill(result.gradient.begin(), result.gradient.end(), 0); + std::fill(d_x.begin(), d_x.end(), 0); + + gmm_objective_pullback( + input.d, + input.k, + input.n, + input.alphas.data(), + input.means.data(), + input.icf.data(), + input.x.data(), + input.wishart, + &tmp, + &d_d, + &d_k, + &d_n, + alphas_gradient_part, + means_gradient_part, + icf_gradient_part, + d_x.data(), + &d_wishart, + &errb + ); + } +} + + + +extern "C" DLL_PUBLIC ITest* get_gmm_test() +{ + return new CladLatestTBRGMM(); +} \ No newline at end of file diff --git a/src/cpp/modules/cladLatestTBR/CladLatestTBRGMM.h b/src/cpp/modules/cladLatestTBR/CladLatestTBRGMM.h new file mode 100644 index 00000000..390b585a --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/CladLatestTBRGMM.h @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#include "../../shared/ITest.h" +#include "../../shared/GMMData.h" + +#include + +class CladLatestTBRGMM : public ITest +{ +private: + GMMInput input; + GMMOutput result; + std::vector state; + +public: + // This function must be called before any other function. + virtual void prepare(GMMInput&& input) override; + + virtual void calculate_objective(int times) override; + virtual void calculate_jacobian(int times) override; + virtual GMMOutput output() override; + + ~CladLatestTBRGMM() {} +}; + diff --git a/src/cpp/modules/cladLatestTBR/ba/ba.h b/src/cpp/modules/cladLatestTBR/ba/ba.h new file mode 100644 index 00000000..937a686f --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/ba/ba.h @@ -0,0 +1,192 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. +#ifndef CLADLATEST_BA_H +#define CLADLATEST_BA_H +#include +#include +#include + +#include "defs.h" +#include "matrix.h" + +//////////////////////////////////////////////////////////// +//////////////////// Declarations ////////////////////////// +//////////////////////////////////////////////////////////// + +// cam: 11 camera in format [r1 r2 r3 C1 C2 C3 f u0 v0 k1 k2] +// r1, r2, r3 are angle - axis rotation parameters(Rodrigues) +// [C1 C2 C3]' is the camera center +// f is the focal length in pixels +// [u0 v0]' is the principal point +// k1, k2 are radial distortion parameters +// X: 3 point +// feats: 2 feature (x,y coordinates) +// reproj_err: 2 +// projection function: +// Xcam = R * (X - C) +// distorted = radial_distort(projective2euclidean(Xcam), radial_parameters) +// proj = distorted * f + principal_point +// err = sqsum(proj - measurement) +template +void computeReprojError(const T *const cam, const T *const X, const T *const w, + const double *const feat, T *err); + +// w: 1 +// w_err: 1 +template void computeZachWeightError(const T *const w, T *err); + +// n number of cameras +// m number of points +// p number of observations +// cams: 11*n cameras in format [r1 r2 r3 C1 C2 C3 f u0 v0 k1 k2] +// r1, r2, r3 are angle - axis rotation parameters(Rodrigues) +// [C1 C2 C3]' is the camera center +// f is the focal length in pixels +// [u0 v0]' is the principal point +// k1, k2 are radial distortion parameters +// X: 3*m points +// obs: 2*p observations (pairs cameraIdx, pointIdx) +// feats: 2*p features (x,y coordinates corresponding to observations) +// reproj_err: 2*p errors of observations +// w_err: p weight "error" terms +// projection function: +// Xcam = R * (X - C) +// distorted = radial_distort(projective2euclidean(Xcam), radial_parameters) +// proj = distorted * f + principal_point +// err = sqsum(proj - measurement) +template +void ba_objective(int n, int m, int p, const T *const cams, const T *const X, + const T *const w, const int *const obs, + const double *const feats, T *reproj_err, T *w_err); + +// rot: 3 rotation parameters +// pt: 3 point to be rotated +// rotatedPt: 3 rotated point +// this is an efficient evaluation (part of +// the Ceres implementation) +// easy to understand calculation in matlab: +// theta = sqrt(sum(w. ^ 2)); +// n = w / theta; +// n_x = au_cross_matrix(n); +// R = eye(3) + n_x*sin(theta) + n_x*n_x*(1 - cos(theta)); +template +void rodrigues_rotate_point(const T *const rot, const T *const pt, + T *rotatedPt); + +//////////////////////////////////////////////////////////// +//////////////////// Definitions /////////////////////////// +//////////////////////////////////////////////////////////// + +template T sqsum(int n, const T *const x) { + T res = 0; + int i = 0; + for (; i < n; i++) + res = res + x[i] * x[i]; + return res; +} + +template +void rodrigues_rotate_point(const T * rot, const T * pt, + T *rotatedPt) { + T sqtheta = sqsum(3, rot); + int i0 = 0; + int i1 = 0; + int i2 = 0; + T theta, costheta, sintheta, theta_inverse, tmp; + T w[3], w_cross_pt[3]; + T rot_cross_pt[3]; + if (sqtheta != 0) { + theta = sqrt(sqtheta); + costheta = cos(theta); + sintheta = sin(theta); + theta_inverse = 1.0 / theta; + + for (; i0 < 3; i0++) + w[i0] = rot[i0] * theta_inverse; + + cross(w, pt, w_cross_pt); + + tmp = (w[0] * pt[0] + w[1] * pt[1] + w[2] * pt[2]) * (1. - costheta); + + for (; i1 < 3; i1++) + rotatedPt[i1] = pt[i1] * costheta + w_cross_pt[i1] * sintheta + w[i1] * tmp; + } else { + cross(rot, pt, rot_cross_pt); + + for (; i2 < 3; i2++) + rotatedPt[i2] = pt[i2] + rot_cross_pt[i2]; + } +} + +template void radial_distort(const T *const rad_params, T *proj) { + T rsq, L; + rsq = sqsum(2, proj); + L = 1. + rad_params[0] * rsq + rad_params[1] * rsq * rsq; + proj[0] = proj[0] * L; + proj[1] = proj[1] * L; +} + +template +void project(const T * cam, const T * X, T *proj) { + const T *const C = &cam[3]; + T Xo[3], Xcam[3]; + + Xo[0] = X[0] - C[0]; + Xo[1] = X[1] - C[1]; + Xo[2] = X[2] - C[2]; + + rodrigues_rotate_point(cam + 0, Xo, Xcam); + + proj[0] = Xcam[0] / Xcam[2]; + proj[1] = Xcam[1] / Xcam[2]; + + radial_distort(cam + 9, proj); + + proj[0] = proj[0] * cam[6] + cam[7]; + proj[1] = proj[1] * cam[6] + cam[8]; +} + +template +void computeReprojError(const T * cam, const T * X, const T * w, + const double * feat, T *err) { + T proj[2]; + project(cam, X, proj); + + err[0] = (*w) * (proj[0] - feat[0]); + err[1] = (*w) * (proj[1] - feat[1]); +} + +template void computeZachWeightError(const T *const w, T *err) { + *err = 1 - (*w) * (*w); +} + +template +void ba_objective(int n, int m, int p, const T *const cams, const T *const X, + const T *const w, const int *const obs, + const double *const feats, T *reproj_err, T *w_err) { + for (int i = 0; i < p; i++) { + int camIdx = obs[i * 2 + 0]; + int ptIdx = obs[i * 2 + 1]; + computeReprojError(&cams[camIdx * BA_NCAMPARAMS], &X[ptIdx * 3], &w[i], + &feats[i * 2], &reproj_err[2 * i]); + } + + for (int i = 0; i < p; i++) { + computeZachWeightError(&w[i], &w_err[i]); + } +} + +template +T computeReprojError_wrapper(const T * cam, const T * X, + const T * w, const double * feat, + T *err) { + computeReprojError(cam, X, w, feat, err); + return err[0]; +} + +template +T computeZachWeightError_wrapper(const T * w, T *err) { + computeZachWeightError(w, err); + return err[0]; +} +#endif \ No newline at end of file diff --git a/src/cpp/modules/cladLatestTBR/ba/ba_grad.cpp b/src/cpp/modules/cladLatestTBR/ba/ba_grad.cpp new file mode 100644 index 00000000..6a0c58e0 --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/ba/ba_grad.cpp @@ -0,0 +1,452 @@ +#include "ba.h" +#include "clad/Differentiator/Differentiator.h" + +void sqsum_pullback(int n, const double *const x, double _d_y, + clad::array_ref _d_n, clad::array_ref _d_x) { + double _d_res = 0; + int _d_i = 0; + unsigned long _t0; + double res = 0; + int i = 0; + _t0 = 0; + for (; i < n; i++) { + _t0++; + res = res + x[i] * x[i]; + } + goto _label0; +_label0: + _d_res += _d_y; + for (; _t0; _t0--) { + i--; + double _r_d0 = _d_res; + _d_res += _r_d0; + _d_x[i] += _r_d0 * x[i]; + _d_x[i] += x[i] * _r_d0; + _d_res -= _r_d0; + } +} +void cross_pullback(const double *const a, const double *const b, double *out, + clad::array_ref _d_a, clad::array_ref _d_b, + clad::array_ref _d_out) { + out[0] = a[1] * b[2] - a[2] * b[1]; + out[1] = a[2] * b[0] - a[0] * b[2]; + out[2] = a[0] * b[1] - a[1] * b[0]; + { + double _r_d2 = _d_out[2]; + _d_a[0] += _r_d2 * b[1]; + _d_b[1] += a[0] * _r_d2; + _d_a[1] += -_r_d2 * b[0]; + _d_b[0] += a[1] * -_r_d2; + _d_out[2] -= _r_d2; + _d_out[2]; + } + { + double _r_d1 = _d_out[1]; + _d_a[2] += _r_d1 * b[0]; + _d_b[0] += a[2] * _r_d1; + _d_a[0] += -_r_d1 * b[2]; + _d_b[2] += a[0] * -_r_d1; + _d_out[1] -= _r_d1; + _d_out[1]; + } + { + double _r_d0 = _d_out[0]; + _d_a[1] += _r_d0 * b[2]; + _d_b[2] += a[1] * _r_d0; + _d_a[2] += -_r_d0 * b[1]; + _d_b[1] += a[2] * -_r_d0; + _d_out[0] -= _r_d0; + _d_out[0]; + } +} +void rodrigues_rotate_point_pullback(const double *rot, const double *pt, + double *rotatedPt, + clad::array_ref _d_rot, + clad::array_ref _d_pt, + clad::array_ref _d_rotatedPt) { + const double *_t0; + double _d_sqtheta = 0; + int _d_i0 = 0; + int _d_i1 = 0; + int _d_i2 = 0; + double _d_theta = 0, _d_costheta = 0, _d_sintheta = 0, _d_theta_inverse = 0, + _d_tmp = 0; + clad::array _d_w(3UL), _d_w_cross_pt(3UL); + clad::array _d_rot_cross_pt(3UL); + bool _cond0; + unsigned long _t1; + const double *_t2; + unsigned long _t3; + const double *_t4; + const double *_t5; + unsigned long _t6; + _t0 = rot; + double sqtheta = sqsum(3, rot); + int i0 = 0; + int i1 = 0; + int i2 = 0; + double theta, costheta, sintheta, theta_inverse, tmp; + double w[3], w_cross_pt[3]; + double rot_cross_pt[3]; + _cond0 = sqtheta != 0; + if (_cond0) { + theta = sqrt(sqtheta); + costheta = cos(theta); + sintheta = sin(theta); + theta_inverse = 1. / theta; + _t1 = 0; + for (; i0 < 3; i0++) { + _t1++; + w[i0] = rot[i0] * theta_inverse; + } + _t2 = pt; + cross(w, pt, w_cross_pt); + tmp = (w[0] * pt[0] + w[1] * pt[1] + w[2] * pt[2]) * (1. - costheta); + _t3 = 0; + for (; i1 < 3; i1++) { + _t3++; + rotatedPt[i1] = + pt[i1] * costheta + w_cross_pt[i1] * sintheta + w[i1] * tmp; + } + } else { + _t4 = rot; + _t5 = pt; + cross(rot, pt, rot_cross_pt); + _t6 = 0; + for (; i2 < 3; i2++) { + _t6++; + rotatedPt[i2] = pt[i2] + rot_cross_pt[i2]; + } + } + if (_cond0) { + for (; _t3; _t3--) { + i1--; + double _r_d6 = _d_rotatedPt[i1]; + _d_pt[i1] += _r_d6 * costheta; + _d_costheta += pt[i1] * _r_d6; + _d_w_cross_pt[i1] += _r_d6 * sintheta; + _d_sintheta += w_cross_pt[i1] * _r_d6; + _d_w[i1] += _r_d6 * tmp; + _d_tmp += w[i1] * _r_d6; + _d_rotatedPt[i1] -= _r_d6; + _d_rotatedPt[i1]; + } + { + double _r_d5 = _d_tmp; + _d_w[0] += _r_d5 * (1. - costheta) * pt[0]; + _d_pt[0] += w[0] * _r_d5 * (1. - costheta); + _d_w[1] += _r_d5 * (1. - costheta) * pt[1]; + _d_pt[1] += w[1] * _r_d5 * (1. - costheta); + _d_w[2] += _r_d5 * (1. - costheta) * pt[2]; + _d_pt[2] += w[2] * _r_d5 * (1. - costheta); + _d_costheta += -(w[0] * pt[0] + w[1] * pt[1] + w[2] * pt[2]) * _r_d5; + _d_tmp -= _r_d5; + } + { + pt = _t2; + cross_pullback(w, _t2, w_cross_pt, _d_w, _d_pt, _d_w_cross_pt); + } + for (; _t1; _t1--) { + i0--; + double _r_d4 = _d_w[i0]; + _d_rot[i0] += _r_d4 * theta_inverse; + _d_theta_inverse += rot[i0] * _r_d4; + _d_w[i0] -= _r_d4; + _d_w[i0]; + } + { + double _r_d3 = _d_theta_inverse; + double _r4 = _r_d3 * -1. / (theta * theta); + _d_theta += _r4; + _d_theta_inverse -= _r_d3; + } + { + double _r_d2 = _d_sintheta; + double _r3 = + _r_d2 * + clad::custom_derivatives::sin_pushforward(theta, 1.).pushforward; + _d_theta += _r3; + _d_sintheta -= _r_d2; + } + { + double _r_d1 = _d_costheta; + double _r2 = + _r_d1 * + clad::custom_derivatives::cos_pushforward(theta, 1.).pushforward; + _d_theta += _r2; + _d_costheta -= _r_d1; + } + { + double _r_d0 = _d_theta; + double _r1 = + _r_d0 * + clad::custom_derivatives::sqrt_pushforward(sqtheta, 1.).pushforward; + _d_sqtheta += _r1; + _d_theta -= _r_d0; + } + } else { + for (; _t6; _t6--) { + i2--; + double _r_d7 = _d_rotatedPt[i2]; + _d_pt[i2] += _r_d7; + _d_rot_cross_pt[i2] += _r_d7; + _d_rotatedPt[i2] -= _r_d7; + _d_rotatedPt[i2]; + } + { + rot = _t4; + pt = _t5; + cross_pullback(_t4, _t5, rot_cross_pt, _d_rot, _d_pt, _d_rot_cross_pt); + } + } + { + rot = _t0; + int _grad0 = 0; + sqsum_pullback(3, _t0, _d_sqtheta, &_grad0, _d_rot); + int _r0 = _grad0; + } +} +void radial_distort_pullback(const double *const rad_params, double *proj, + clad::array_ref _d_rad_params, + clad::array_ref _d_proj) { + double _d_rsq = 0, _d_L = 0; + double *_t0; + double _t1; + double _t2; + double rsq, L; + _t0 = proj; + rsq = sqsum(2, proj); + L = 1. + rad_params[0] * rsq + rad_params[1] * rsq * rsq; + _t1 = proj[0]; + proj[0] = proj[0] * L; + _t2 = proj[1]; + proj[1] = proj[1] * L; + { + proj[1] = _t2; + double _r_d3 = _d_proj[1]; + _d_proj[1] += _r_d3 * L; + _d_L += proj[1] * _r_d3; + _d_proj[1] -= _r_d3; + _d_proj[1]; + } + { + proj[0] = _t1; + double _r_d2 = _d_proj[0]; + _d_proj[0] += _r_d2 * L; + _d_L += proj[0] * _r_d2; + _d_proj[0] -= _r_d2; + _d_proj[0]; + } + { + double _r_d1 = _d_L; + _d_rad_params[0] += _r_d1 * rsq; + _d_rsq += rad_params[0] * _r_d1; + _d_rad_params[1] += _r_d1 * rsq * rsq; + _d_rsq += rad_params[1] * _r_d1 * rsq; + _d_rsq += rad_params[1] * rsq * _r_d1; + _d_L -= _r_d1; + } + { + double _r_d0 = _d_rsq; + proj = _t0; + int _grad0 = 0; + sqsum_pullback(2, _t0, _r_d0, &_grad0, _d_proj); + int _r0 = _grad0; + _d_rsq -= _r_d0; + } +} +void project_pullback(const double *cam, const double *X, double *proj, + clad::array_ref _d_cam, + clad::array_ref _d_X, + clad::array_ref _d_proj) { + double *_d_C = 0; + clad::array _d_Xo(3UL), _d_Xcam(3UL); + const double *_t0; + const double *_t1; + double *_t2; + double _t3; + double _t4; + _d_C = &_d_cam[3]; + const double *const C = &cam[3]; + double Xo[3], Xcam[3]; + Xo[0] = X[0] - C[0]; + Xo[1] = X[1] - C[1]; + Xo[2] = X[2] - C[2]; + _t0 = cam + 0; + rodrigues_rotate_point(cam + 0, Xo, Xcam); + proj[0] = Xcam[0] / Xcam[2]; + proj[1] = Xcam[1] / Xcam[2]; + _t1 = cam + 9; + _t2 = proj; + // CFIX-start + double tproj0 = proj[0]; + double tproj1 = proj[1]; + // CFIX-end + radial_distort(cam + 9, proj); + _t3 = proj[0]; + proj[0] = proj[0] * cam[6] + cam[7]; + _t4 = proj[1]; + proj[1] = proj[1] * cam[6] + cam[8]; + { + proj[1] = _t4; + double _r_d6 = _d_proj[1]; + _d_proj[1] += _r_d6 * cam[6]; + _d_cam[6] += proj[1] * _r_d6; + _d_cam[8] += _r_d6; + _d_proj[1] -= _r_d6; + _d_proj[1]; + } + { + proj[0] = _t3; + double _r_d5 = _d_proj[0]; + _d_proj[0] += _r_d5 * cam[6]; + _d_cam[6] += proj[0] * _r_d5; + _d_cam[7] += _r_d5; + _d_proj[0] -= _r_d5; + _d_proj[0]; + } + { + proj = _t2; + proj[0] = tproj0; + proj[1] = tproj1; + radial_distort_pullback(_t1, _t2, _d_cam.ptr_ref() + 9, _d_proj); + } + { + double _r_d4 = _d_proj[1]; + _d_Xcam[1] += _r_d4 / Xcam[2]; + double _r1 = _r_d4 * -Xcam[1] / (Xcam[2] * Xcam[2]); + _d_Xcam[2] += _r1; + _d_proj[1] -= _r_d4; + _d_proj[1]; + } + { + double _r_d3 = _d_proj[0]; + _d_Xcam[0] += _r_d3 / Xcam[2]; + double _r0 = _r_d3 * -Xcam[0] / (Xcam[2] * Xcam[2]); + _d_Xcam[2] += _r0; + _d_proj[0] -= _r_d3; + _d_proj[0]; + } + rodrigues_rotate_point_pullback(_t0, Xo, Xcam, _d_cam.ptr_ref() + 0, _d_Xo, + _d_Xcam); + { + double _r_d2 = _d_Xo[2]; + _d_X[2] += _r_d2; + _d_C[2] += -_r_d2; + _d_Xo[2] -= _r_d2; + _d_Xo[2]; + } + { + double _r_d1 = _d_Xo[1]; + _d_X[1] += _r_d1; + _d_C[1] += -_r_d1; + _d_Xo[1] -= _r_d1; + _d_Xo[1]; + } + { + double _r_d0 = _d_Xo[0]; + _d_X[0] += _r_d0; + _d_C[0] += -_r_d0; + _d_Xo[0] -= _r_d0; + _d_Xo[0]; + } +} +void computeReprojError_pullback(const double *cam, const double *X, + const double *w, const double *feat, + double *err, clad::array_ref _d_cam, + clad::array_ref _d_X, + clad::array_ref _d_w, + clad::array_ref _d_feat, + clad::array_ref _d_err) { + clad::array _d_proj(2UL); + const double *_t0; + const double *_t1; + double proj[2]; + _t0 = cam; + _t1 = X; + project(cam, X, proj); + err[0] = *w * (proj[0] - feat[0]); + err[1] = *w * (proj[1] - feat[1]); + { + double _r_d1 = _d_err[1]; + *_d_w += _r_d1 * (proj[1] - feat[1]); + _d_proj[1] += *w * _r_d1; + _d_feat[1] += -*w * _r_d1; + _d_err[1] -= _r_d1; + _d_err[1]; + } + { + double _r_d0 = _d_err[0]; + *_d_w += _r_d0 * (proj[0] - feat[0]); + _d_proj[0] += *w * _r_d0; + _d_feat[0] += -*w * _r_d0; + _d_err[0] -= _r_d0; + _d_err[0]; + } + { + cam = _t0; + X = _t1; + project_pullback(_t0, _t1, proj, _d_cam, _d_X, _d_proj); + } +} +void computeReprojError_wrapper_grad( + const double *cam, const double *X, const double *w, const double *feat, + double *err, clad::array_ref _d_cam, clad::array_ref _d_X, + clad::array_ref _d_w, clad::array_ref _d_feat, + clad::array_ref _d_err) { + const double *_t0; + const double *_t1; + const double *_t2; + const double *_t3; + double *_t4; + _t0 = cam; + _t1 = X; + _t2 = w; + _t3 = feat; + _t4 = err; + computeReprojError(cam, X, w, feat, err); + goto _label0; +_label0: + _d_err[0] += 1; + { + cam = _t0; + X = _t1; + w = _t2; + feat = _t3; + err = _t4; + computeReprojError_pullback(_t0, _t1, _t2, _t3, _t4, _d_cam, _d_X, _d_w, + _d_feat, _d_err); + } +} +void computeZachWeightError_pullback(const double *const w, double *err, + clad::array_ref _d_w, + clad::array_ref _d_err) { + double _t0; + _t0 = *err; + *err = 1 - *w * (*w); + { + *err = _t0; + double _r_d0 = *_d_err; + *_d_w += -_r_d0 * (*w); + *_d_w += *w * -_r_d0; + *_d_err -= _r_d0; + *_d_err; + } +} +void computeZachWeightError_wrapper_grad(const double *w, double *err, + clad::array_ref _d_w, + clad::array_ref _d_err) { + const double *_t0; + double *_t1; + _t0 = w; + _t1 = err; + computeZachWeightError(w, err); + goto _label0; +_label0: + _d_err[0] += 1; + { + w = _t0; + err = _t1; + computeZachWeightError_pullback(_t0, _t1, _d_w, _d_err); + } +} \ No newline at end of file diff --git a/src/cpp/modules/cladLatestTBR/ba/ba_grad.h b/src/cpp/modules/cladLatestTBR/ba/ba_grad.h new file mode 100644 index 00000000..de42ade1 --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/ba/ba_grad.h @@ -0,0 +1,13 @@ +#include "clad/Differentiator/ArrayRef.h" + +void computeReprojError_pullback(const double *cam, const double *X, + const double *w, const double *feat, + double *err, clad::array_ref _d_cam, + clad::array_ref _d_X, + clad::array_ref _d_w, + clad::array_ref _d_feat, + clad::array_ref _d_err); + +void computeZachWeightError_pullback(const double *const w, double *err, + clad::array_ref _d_w, + clad::array_ref _d_err); \ No newline at end of file diff --git a/src/cpp/modules/cladLatestTBR/gmm/gmm.h b/src/cpp/modules/cladLatestTBR/gmm/gmm.h new file mode 100644 index 00000000..c7908de3 --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/gmm/gmm.h @@ -0,0 +1,299 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#include +#include +using std::vector; +#include "defs.h" +#include "matrix.h" + +// CLADFIX-Add-Start +// namespace clad { +// namespace custom_derivatives { +// namespace class_functions { +// template +// void resize_pullback(::std::vector *v, size_t n, ::std::vector *d_v, size_t d_n) { + +// } +// } +// } +// } +// CLADFIX-Add-end + +//////////////////////////////////////////////////////////// +//////////////////// Declarations ////////////////////////// +//////////////////////////////////////////////////////////// + +// d: dim +// k: number of gaussians +// n: number of points +// alphas: k logs of mixture weights (unnormalized), so +// weights = exp(log_alphas) / sum(exp(log_alphas)) +// means: d*k component means +// icf: (d*(d+1)/2)*k inverse covariance factors +// every icf entry stores firstly log of diagonal and then +// columnwise other entris +// To generate icf in MATLAB given covariance C : +// L = inv(chol(C, 'lower')); +// inv_cov_factor = [log(diag(L)); L(au_tril_indices(d, -1))] +// wishart: wishart distribution parameters +// x: d*n points +// err: 1 output +template +void gmm_objective(int d, int k, int n, const T* const alphas, const T* const means, + const T* const icf, const double* const x, Wishart wishart, T* err); + +// split of the outer loop over points +template +void gmm_objective_split_inner(int d, int k, + const T* const alphas, + const T* const means, + const T* const icf, + const double* const x, + Wishart wishart, + T* err); +// other terms which are outside the loop +template +void gmm_objective_split_other(int d, int k, int n, + const T* const alphas, + const T* const means, + const T* const icf, + Wishart wishart, + T* err); + +template +T logsumexp(int n, const T* const x); + +// p: dim +// k: number of components +// wishart parameters +// sum_qs: k sums of log diags of Qs +// Qdiags: d*k +// icf: (p*(p+1)/2)*k inverse covariance factors +template +T log_wishart_prior(int p, int k, + Wishart wishart, + const T* const sum_qs, + const T* const Qdiags, + const T* const icf); + +template +void preprocess_qs(int d, int k, + const T* const icf, + T* sum_qs, + T* Qdiags); + +template +void Qtimesx(int d, + const T* const Qdiag, + const T* const ltri, // strictly lower triangular part + const T* const x, + T* out); + +//////////////////////////////////////////////////////////// +//////////////////// Definitions /////////////////////////// +//////////////////////////////////////////////////////////// + +template +T logsumexp(int n, const T* const x) +{ + T mx = arr_max(n, x); + T semx = 0.; + for (int i = 0; i < n; i++) + { + semx = semx + exp(x[i] - mx); + } + return log(semx) + mx; +} + +template +T log_wishart_prior(int p, int k, + Wishart wishart, + const T* const sum_qs, + const T* const Qdiags, + const T* const icf) +{ + int n = p + wishart.m + 1; + int icf_sz = p * (p + 1) / 2; + + double C = n * p * (log(wishart.gamma) - 0.5 * log(2)) - log_gamma_distrib(0.5 * n, p); + + T out = 0; + for (int ik = 0; ik < k; ik++) + { + T frobenius = sqnorm(p, &Qdiags[ik * p]) + sqnorm(icf_sz - p, &icf[ik * icf_sz + p]); + out = out + 0.5 * wishart.gamma * wishart.gamma * (frobenius) + -wishart.m * sum_qs[ik]; + } + + return out - k * C; +} + +template +void preprocess_qs(int d, int k, + const T* const icf, + T* sum_qs, + T* Qdiags) +{ + int icf_sz = d * (d + 1) / 2; + for (int ik = 0; ik < k; ik++) + { + sum_qs[ik] = 0.; + for (int id = 0; id < d; id++) + { + T q = icf[ik * icf_sz + id]; + sum_qs[ik] = sum_qs[ik] + q; + Qdiags[ik * d + id] = exp(q); + } + } +} + +template +void Qtimesx(int d, + const T* const Qdiag, + const T* const ltri, // strictly lower triangular part + const T* const x, + T* out) +{ + for (int id = 0; id < d; id++) + out[id] = Qdiag[id] * x[id]; + + int Lparamsidx = 0; + for (int i = 0; i < d; i++) + { + for (int j = i + 1; j < d; j++) + { + out[j] = out[j] + ltri[Lparamsidx] * x[i]; + Lparamsidx++; + } + } +} + +template +void gmm_objective(int d, int k, int n, + const T* const alphas, + const T* const means, + const T* const icf, + const double* const x, + Wishart wishart, + T* err) +{ + const double CONSTANT = -n * d * 0.5 * log(2 * PI); + int icf_sz = d * (d + 1) / 2; + + // CLADFIX-Modify-DeleteStart(VectorInit) + // vector Qdiags(d * k); + // vector sum_qs(k); + // vector xcentered(d); + // vector Qxcentered(d); + // vector main_term(k); + // CLADFIX-Modify-DeleteEnd(VectorInit) + // CLADFIX-Modify-AddStart(VectorInit) + T Qdiags[d * k]; + T sum_qs[k]; + T xcentered[d]; + T Qxcentered[d]; + T main_term[k]; + //CLADFIX-Modify-AddEnd(VectorInit) + + preprocess_qs(d, k, icf, &sum_qs[0], &Qdiags[0]); + + T slse = 0.; + for (int ix = 0; ix < n; ix++) + { + for (int ik = 0; ik < k; ik++) + { + subtract(d, &x[ix * d], &means[ik * d], &xcentered[0]); + Qtimesx(d, &Qdiags[ik * d], &icf[ik * icf_sz + d], &xcentered[0], &Qxcentered[0]); + + main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5 * sqnorm(d, &Qxcentered[0]); + } + slse = slse + logsumexp(k, &main_term[0]); + } + + T lse_alphas = logsumexp(k, alphas); + + *err = CONSTANT + slse - n * lse_alphas; + + *err = *err + log_wishart_prior(d, k, wishart, &sum_qs[0], &Qdiags[0], icf); + +} + +template +void gmm_objective_split_inner(int d, int k, + const T* const alphas, + const T* const means, + const T* const icf, + const double* const x, + Wishart wishart, + T* err) +{ + int icf_sz = d * (d + 1) / 2; + + T* Ldiag = new T[d]; + T* xcentered = new T[d]; + T* mahal = new T[d]; + T* lse = new T[k]; + + for (int ik = 0; ik < k; ik++) + { + int icf_off = ik * icf_sz; + T sumlog_Ldiag(0.); + for (int id = 0; id < d; id++) + { + sumlog_Ldiag = sumlog_Ldiag + icf[icf_off + id]; + Ldiag[id] = exp(icf[icf_off + id]); + } + + for (int id = 0; id < d; id++) + { + xcentered[id] = x[id] - means[ik * d + id]; + mahal[id] = Ldiag[id] * xcentered[id]; + } + int Lparamsidx = d; + for (int i = 0; i < d; i++) + { + for (int j = i + 1; j < d; j++) + { + mahal[j] = mahal[j] + icf[icf_off + Lparamsidx] * xcentered[i]; + Lparamsidx++; + } + } + T sqsum_mahal(0.); + for (int id = 0; id < d; id++) + { + sqsum_mahal = sqsum_mahal + mahal[id] * mahal[id]; + } + + lse[ik] = alphas[ik] + sumlog_Ldiag - 0.5 * sqsum_mahal; + } + + *err = logsumexp(k, lse); + + delete[] mahal; + delete[] xcentered; + delete[] Ldiag; + delete[] lse; +} + +template +void gmm_objective_split_other(int d, int k, int n, + const T* const alphas, + const T* const means, + const T* const icf, + Wishart wishart, + T* err) +{ + const double CONSTANT = -n * d * 0.5 * log(2 * PI); + + T lse_alphas = logsumexp(k, alphas); + + T* sum_qs = new T[k]; + T* Qdiags = new T[d * k]; + preprocess_qs(d, k, icf, sum_qs, Qdiags); + *err = CONSTANT - n * lse_alphas + log_wishart_prior(d, k, wishart, sum_qs, Qdiags, icf); + delete[] sum_qs; + delete[] Qdiags; +} \ No newline at end of file diff --git a/src/cpp/modules/cladLatestTBR/gmm/gmm_grad_tbr.cpp b/src/cpp/modules/cladLatestTBR/gmm/gmm_grad_tbr.cpp new file mode 100644 index 00000000..b7067741 --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/gmm/gmm_grad_tbr.cpp @@ -0,0 +1,516 @@ +#include "gmm.h" +// CLADFIX-Add-Start +#include "clad/Differentiator/Differentiator.h" +// CLADFIX-Add-End + +void gmm_objective_pullback(int d, int k, int n, const double *const alphas, const double *const means, const double *const icf, const double *const x, Wishart wishart, double *err, int *_d_d, int *_d_k, int *_d_n, double *_d_alphas, double *_d_means, double *_d_icf, double *_d_x, Wishart *_d_wishart, double *_d_err); +void gmm_objective_wrapper_grad(int d, int k, int n, const double *const alphas, const double *const means, const double *const icf, const double *const x, Wishart wishart, double *err, int *_d_d, int *_d_k, int *_d_n, double *_d_alphas, double *_d_means, double *_d_icf, double *_d_x, Wishart *_d_wishart, double *_d_err) { + gmm_objective(d, k, n, alphas, means, icf, x, wishart, err); + goto _label0; + _label0: + _d_err[0] += 1; + { + int _r0 = 0; + int _r1 = 0; + int _r2 = 0; + Wishart _r3 = {}; + gmm_objective_pullback(d, k, n, alphas, means, icf, x, wishart, err, &_r0, &_r1, &_r2, _d_alphas, _d_means, _d_icf, _d_x, &_r3, _d_err); + *_d_d += _r0; + *_d_k += _r1; + *_d_n += _r2; + } +} +void preprocess_qs_pullback(int d, int k, const double *const icf, double *sum_qs, double *Qdiags, int *_d_d, int *_d_k, double *_d_icf, double *_d_sum_qs, double *_d_Qdiags); +void subtract_pullback(int d, const double *const x, const double *const y, double *out, int *_d_d, double *_d_x, double *_d_y, double *_d_out); +void Qtimesx_pullback(int d, const double *const Qdiag, const double *const ltri, const double *const x, double *out, int *_d_d, double *_d_Qdiag, double *_d_ltri, double *_d_x, double *_d_out); +void sqnorm_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x); +void logsumexp_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x); +void log_wishart_prior_pullback(int p, int k, Wishart wishart, const double *const sum_qs, const double *const Qdiags, const double *const icf, double _d_y, int *_d_p, int *_d_k, Wishart *_d_wishart, double *_d_sum_qs, double *_d_Qdiags, double *_d_icf); +void gmm_objective_pullback(int d, int k, int n, const double *const alphas, const double *const means, const double *const icf, const double *const x, Wishart wishart, double *err, int *_d_d, int *_d_k, int *_d_n, double *_d_alphas, double *_d_means, double *_d_icf, double *_d_x, Wishart *_d_wishart, double *_d_err) { + double _t0; + double _d_CONSTANT = 0; + int _d_icf_sz = 0; + double _d_slse = 0; + unsigned long _t1; + int _d_ix = 0; + int ix = 0; + clad::tape _t2 = {}; + clad::tape _t3 = {}; + int _d_ik = 0; + int ik = 0; + clad::tape _t4 = {}; + clad::tape _t5 = {}; + double _d_lse_alphas = 0; + double _t6; + double _t7; + _t0 = log(2 * 3.1415926535900001); + const double CONSTANT = -n * d * 0.5 * _t0; + int icf_sz = d * (d + 1) / 2; + double _d_Qdiags[d * k]; + clad::zero_init(_d_Qdiags, d * k); + double Qdiags[d * k]; + double _d_sum_qs[k]; + clad::zero_init(_d_sum_qs, k); + double sum_qs[k]; + double _d_xcentered[d]; + clad::zero_init(_d_xcentered, d); + double xcentered[d]; + double _d_Qxcentered[d]; + clad::zero_init(_d_Qxcentered, d); + double Qxcentered[d]; + double _d_main_term[k]; + clad::zero_init(_d_main_term, k); + double main_term[k]; + preprocess_qs(d, k, icf, &sum_qs[0], &Qdiags[0]); + double slse = 0.; + _t1 = 0; + for (ix = 0; ix < n; ix++) { + _t1++; + clad::push(_t2, 0UL); + for (clad::push(_t3, ik) , ik = 0; ik < k; ik++) { + clad::back(_t2)++; + subtract(d, &x[ix * d], &means[ik * d], &xcentered[0]); + Qtimesx(d, &Qdiags[ik * d], &icf[ik * icf_sz + d], &xcentered[0], &Qxcentered[0]); + clad::push(_t4, main_term[ik]); + main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5 * clad::push(_t5, sqnorm(d, &Qxcentered[0])); + } + slse = slse + logsumexp(k, &main_term[0]); + } + double lse_alphas = logsumexp(k, alphas); + _t6 = *err; + *err = CONSTANT + slse - n * lse_alphas; + _t7 = *err; + *err = *err + log_wishart_prior(d, k, wishart, &sum_qs[0], &Qdiags[0], icf); + { + *err = _t7; + double _r_d3 = *_d_err; + *_d_err -= _r_d3; + *_d_err += _r_d3; + int _r7 = 0; + int _r8 = 0; + Wishart _r9 = {}; + log_wishart_prior_pullback(d, k, wishart, &sum_qs[0], &Qdiags[0], icf, _r_d3, &_r7, &_r8, &_r9, &_d_sum_qs[0], &_d_Qdiags[0], _d_icf); + *_d_d += _r7; + *_d_k += _r8; + } + { + *err = _t6; + double _r_d2 = *_d_err; + *_d_err -= _r_d2; + _d_CONSTANT += _r_d2; + _d_slse += _r_d2; + *_d_n += -_r_d2 * lse_alphas; + _d_lse_alphas += n * -_r_d2; + } + { + int _r6 = 0; + logsumexp_pullback(k, alphas, _d_lse_alphas, &_r6, _d_alphas); + *_d_k += _r6; + } + for (; _t1; _t1--) { + ix--; + { + double _r_d1 = _d_slse; + _d_slse -= _r_d1; + _d_slse += _r_d1; + int _r5 = 0; + logsumexp_pullback(k, &main_term[0], _r_d1, &_r5, &_d_main_term[0]); + *_d_k += _r5; + } + { + for (; clad::back(_t2); clad::back(_t2)--) { + ik--; + { + main_term[ik] = clad::pop(_t4); + double _r_d0 = _d_main_term[ik]; + _d_main_term[ik] -= _r_d0; + _d_alphas[ik] += _r_d0; + _d_sum_qs[ik] += _r_d0; + int _r4 = 0; + sqnorm_pullback(d, &Qxcentered[0], 0.5 * -_r_d0, &_r4, &_d_Qxcentered[0]); + *_d_d += _r4; + } + { + int _r3 = 0; + Qtimesx_pullback(d, &Qdiags[ik * d], &icf[ik * icf_sz + d], &xcentered[0], &Qxcentered[0], &_r3, &_d_Qdiags[ik * d], &_d_icf[ik * icf_sz + d], &_d_xcentered[0], &_d_Qxcentered[0]); + *_d_d += _r3; + } + { + int _r2 = 0; + subtract_pullback(d, &x[ix * d], &means[ik * d], &xcentered[0], &_r2, &_d_x[ix * d], &_d_means[ik * d], &_d_xcentered[0]); + *_d_d += _r2; + } + } + { + _d_ik = 0; + ik = clad::pop(_t3); + } + clad::pop(_t2); + } + } + { + int _r0 = 0; + int _r1 = 0; + preprocess_qs_pullback(d, k, icf, &sum_qs[0], &Qdiags[0], &_r0, &_r1, _d_icf, &_d_sum_qs[0], &_d_Qdiags[0]); + *_d_d += _r0; + *_d_k += _r1; + } + { + *_d_d += _d_icf_sz / 2 * (d + 1); + *_d_d += d * _d_icf_sz / 2; + } + { + *_d_n += -_d_CONSTANT * _t0 * 0.5 * d; + *_d_d += -n * _d_CONSTANT * _t0 * 0.5; + } +} +void preprocess_qs_pullback(int d, int k, const double *const icf, double *sum_qs, double *Qdiags, int *_d_d, int *_d_k, double *_d_icf, double *_d_sum_qs, double *_d_Qdiags) { + int _d_icf_sz = 0; + unsigned long _t0; + int _d_ik = 0; + int ik = 0; + clad::tape _t1 = {}; + clad::tape _t2 = {}; + int _d_id = 0; + int id = 0; + clad::tape _t3 = {}; + double _d_q = 0; + double q = 0; + int icf_sz = d * (d + 1) / 2; + _t0 = 0; + for (ik = 0; ik < k; ik++) { + _t0++; + sum_qs[ik] = 0.; + clad::push(_t1, 0UL); + for (clad::push(_t2, id) , id = 0; id < d; id++) { + clad::back(_t1)++; + clad::push(_t3, q) , q = icf[ik * icf_sz + id]; + sum_qs[ik] = sum_qs[ik] + q; + Qdiags[ik * d + id] = exp(q); + } + } + for (; _t0; _t0--) { + ik--; + { + for (; clad::back(_t1); clad::back(_t1)--) { + id--; + { + double _r_d2 = _d_Qdiags[ik * d + id]; + _d_Qdiags[ik * d + id] -= _r_d2; + double _r0 = 0; + _r0 += _r_d2 * clad::custom_derivatives::exp_pushforward(q, 1.).pushforward; + _d_q += _r0; + } + { + double _r_d1 = _d_sum_qs[ik]; + _d_sum_qs[ik] -= _r_d1; + _d_sum_qs[ik] += _r_d1; + _d_q += _r_d1; + } + { + _d_icf[ik * icf_sz + id] += _d_q; + _d_q = 0; + q = clad::pop(_t3); + } + } + { + _d_id = 0; + id = clad::pop(_t2); + } + clad::pop(_t1); + } + { + double _r_d0 = _d_sum_qs[ik]; + _d_sum_qs[ik] -= _r_d0; + } + } + { + *_d_d += _d_icf_sz / 2 * (d + 1); + *_d_d += d * _d_icf_sz / 2; + } +} +void subtract_pullback(int d, const double *const x, const double *const y, double *out, int *_d_d, double *_d_x, double *_d_y, double *_d_out) { + unsigned long _t0; + int _d_id = 0; + int id = 0; + _t0 = 0; + for (id = 0; id < d; id++) { + _t0++; + out[id] = x[id] - y[id]; + } + for (; _t0; _t0--) { + id--; + { + double _r_d0 = _d_out[id]; + _d_out[id] -= _r_d0; + _d_x[id] += _r_d0; + _d_y[id] += -_r_d0; + } + } +} +void Qtimesx_pullback(int d, const double *const Qdiag, const double *const ltri, const double *const x, double *out, int *_d_d, double *_d_Qdiag, double *_d_ltri, double *_d_x, double *_d_out) { + unsigned long _t0; + int _d_id = 0; + int id = 0; + int _d_Lparamsidx = 0; + unsigned long _t1; + int _d_i = 0; + int i = 0; + clad::tape _t2 = {}; + clad::tape _t3 = {}; + int _d_j = 0; + int j = 0; + _t0 = 0; + for (id = 0; id < d; id++) { + _t0++; + out[id] = Qdiag[id] * x[id]; + } + int Lparamsidx = 0; + _t1 = 0; + for (i = 0; i < d; i++) { + _t1++; + clad::push(_t2, 0UL); + for (clad::push(_t3, j) , j = i + 1; j < d; j++) { + clad::back(_t2)++; + out[j] = out[j] + ltri[Lparamsidx] * x[i]; + Lparamsidx++; + } + } + for (; _t1; _t1--) { + i--; + { + for (; clad::back(_t2); clad::back(_t2)--) { + j--; + Lparamsidx--; + { + double _r_d1 = _d_out[j]; + _d_out[j] -= _r_d1; + _d_out[j] += _r_d1; + _d_ltri[Lparamsidx] += _r_d1 * x[i]; + _d_x[i] += ltri[Lparamsidx] * _r_d1; + } + } + { + _d_i += _d_j; + _d_j = 0; + j = clad::pop(_t3); + } + clad::pop(_t2); + } + } + for (; _t0; _t0--) { + id--; + double _r_d0 = _d_out[id]; + _d_out[id] -= _r_d0; + _d_Qdiag[id] += _r_d0 * x[id]; + _d_x[id] += Qdiag[id] * _r_d0; + } +} +void sqnorm_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x) { + double _d_res = 0; + unsigned long _t0; + int _d_i = 0; + int i = 0; + double res = x[0] * x[0]; + _t0 = 0; + for (i = 1; i < n; i++) { + _t0++; + res = res + x[i] * x[i]; + } + goto _label0; + _label0: + _d_res += _d_y; + for (; _t0; _t0--) { + i--; + double _r_d0 = _d_res; + _d_res -= _r_d0; + _d_res += _r_d0; + _d_x[i] += _r_d0 * x[i]; + _d_x[i] += x[i] * _r_d0; + } + { + _d_x[0] += _d_res * x[0]; + _d_x[0] += x[0] * _d_res; + } +} +void arr_max_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x); +void logsumexp_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x) { + double _d_mx = 0; + double _d_semx = 0; + unsigned long _t0; + int _d_i = 0; + int i = 0; + double mx = arr_max(n, x); + double semx = 0.; + _t0 = 0; + for (i = 0; i < n; i++) { + _t0++; + semx = semx + exp(x[i] - mx); + } + goto _label0; + _label0: + { + double _r2 = 0; + _r2 += _d_y * clad::custom_derivatives::log_pushforward(semx, 1.).pushforward; + _d_semx += _r2; + _d_mx += _d_y; + } + for (; _t0; _t0--) { + i--; + { + double _r_d0 = _d_semx; + _d_semx -= _r_d0; + _d_semx += _r_d0; + double _r1 = 0; + _r1 += _r_d0 * clad::custom_derivatives::exp_pushforward(x[i] - mx, 1.).pushforward; + _d_x[i] += _r1; + _d_mx += -_r1; + } + } + { + int _r0 = 0; + arr_max_pullback(n, x, _d_mx, &_r0, _d_x); + *_d_n += _r0; + } +} +inline void log_gamma_distrib_pullback(double a, double p, double _d_y, double *_d_a, double *_d_p); +void log_wishart_prior_pullback(int p, int k, Wishart wishart, const double *const sum_qs, const double *const Qdiags, const double *const icf, double _d_y, int *_d_p, int *_d_k, Wishart *_d_wishart, double *_d_sum_qs, double *_d_Qdiags, double *_d_icf) { + int _d_n = 0; + int _d_icf_sz = 0; + double _t0; + double _t1; + double _d_C = 0; + double _d_out = 0; + unsigned long _t2; + int _d_ik = 0; + int ik = 0; + clad::tape _t3 = {}; + double _d_frobenius = 0; + double frobenius = 0; + int n = p + wishart.m + 1; + int icf_sz = p * (p + 1) / 2; + _t1 = log(2); + _t0 = (log(wishart.gamma) - 0.5 * _t1); + double C = n * p * _t0 - log_gamma_distrib(0.5 * n, p); + double out = 0; + _t2 = 0; + for (ik = 0; ik < k; ik++) { + _t2++; + clad::push(_t3, frobenius) , frobenius = sqnorm(p, &Qdiags[ik * p]) + sqnorm(icf_sz - p, &icf[ik * icf_sz + p]); + out = out + 0.5 * wishart.gamma * wishart.gamma * (frobenius) - wishart.m * sum_qs[ik]; + } + goto _label0; + _label0: + { + _d_out += _d_y; + *_d_k += -_d_y * C; + _d_C += k * -_d_y; + } + for (; _t2; _t2--) { + ik--; + { + double _r_d0 = _d_out; + _d_out -= _r_d0; + _d_out += _r_d0; + (*_d_wishart).gamma += 0.5 * _r_d0 * (frobenius) * wishart.gamma; + (*_d_wishart).gamma += 0.5 * wishart.gamma * _r_d0 * (frobenius); + _d_frobenius += 0.5 * wishart.gamma * wishart.gamma * _r_d0; + (*_d_wishart).m += -_r_d0 * sum_qs[ik]; + _d_sum_qs[ik] += wishart.m * -_r_d0; + } + { + int _r3 = 0; + sqnorm_pullback(p, &Qdiags[ik * p], _d_frobenius, &_r3, &_d_Qdiags[ik * p]); + *_d_p += _r3; + int _r4 = 0; + sqnorm_pullback(icf_sz - p, &icf[ik * icf_sz + p], _d_frobenius, &_r4, &_d_icf[ik * icf_sz + p]); + _d_icf_sz += _r4; + *_d_p += -_r4; + _d_frobenius = 0; + frobenius = clad::pop(_t3); + } + } + { + _d_n += _d_C * _t0 * p; + *_d_p += n * _d_C * _t0; + double _r0 = 0; + _r0 += n * p * _d_C * clad::custom_derivatives::log_pushforward(wishart.gamma, 1.).pushforward; + (*_d_wishart).gamma += _r0; + double _r1 = 0; + double _r2 = 0; + log_gamma_distrib_pullback(0.5 * n, p, -_d_C, &_r1, &_r2); + _d_n += 0.5 * _r1; + *_d_p += _r2; + } + { + *_d_p += _d_icf_sz / 2 * (p + 1); + *_d_p += p * _d_icf_sz / 2; + } + { + *_d_p += _d_n; + (*_d_wishart).m += _d_n; + } +} +void arr_max_pullback(int n, const double *const x, double _d_y, int *_d_n, double *_d_x) { + double _d_m = 0; + unsigned long _t0; + int _d_i = 0; + int i = 0; + clad::tape _t2 = {}; + double m = x[0]; + _t0 = 0; + for (i = 1; i < n; i++) { + _t0++; + bool _t1 = m < x[i]; + { + if (_t1) + m = x[i]; + clad::push(_t2, _t1); + } + } + goto _label0; + _label0: + _d_m += _d_y; + for (; _t0; _t0--) { + i--; + if (clad::pop(_t2)) { + double _r_d0 = _d_m; + _d_m -= _r_d0; + _d_x[i] += _r_d0; + } + } + _d_x[0] += _d_m; +} +inline void log_gamma_distrib_pullback(double a, double p, double _d_y, double *_d_a, double *_d_p) { + double _t0; + double _d_out = 0; + unsigned long _t1; + int _d_j = 0; + int j = 0; + _t0 = log(3.1415926535900001); + double out = 0.25 * p * (p - 1) * _t0; + _t1 = 0; + for (j = 1; j <= p; j++) { + _t1++; + out = out + lgamma(a + 0.5 * (1 - j)); + } + goto _label0; + _label0: + _d_out += _d_y; + for (; _t1; _t1--) { + j--; + { + double _r_d0 = _d_out; + _d_out -= _r_d0; + _d_out += _r_d0; + double _r0 = 0; + _r0 += _r_d0 * numerical_diff::forward_central_difference(lgamma, a + 0.5 * (1 - j), 0, 0, a + 0.5 * (1 - j)); + *_d_a += _r0; + _d_j += -0.5 * _r0; + } + } + { + *_d_p += 0.25 * _d_out * _t0 * (p - 1); + *_d_p += 0.25 * p * _d_out * _t0; + } +} \ No newline at end of file diff --git a/src/cpp/modules/cladLatestTBR/gmm/gmm_grad_tbr.h b/src/cpp/modules/cladLatestTBR/gmm/gmm_grad_tbr.h new file mode 100644 index 00000000..dceb27a1 --- /dev/null +++ b/src/cpp/modules/cladLatestTBR/gmm/gmm_grad_tbr.h @@ -0,0 +1,2 @@ +#include "gmm.h" +void gmm_objective_pullback(int d, int k, int n, const double *const alphas, const double *const means, const double *const icf, const double *const x, Wishart wishart, double *err, int *_d_d, int *_d_k, int *_d_n, double *_d_alphas, double *_d_means, double *_d_icf, double *_d_x, Wishart *_d_wishart, double *_d_err); \ No newline at end of file