Skip to content

feat: add lapack/base/dlaqr5 #7686

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
329 changes: 329 additions & 0 deletions lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
/**
* @license Apache-2.0
*
* Copyright (c) 2025 The Stdlib Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/* eslint-disable max-len, max-params, max-lines-per-function, max-statements, max-lines, max-depth */

'use strict';

// MODULES //

var floor = require( '@stdlib/math/base/special/floor' );
var abs = require( '@stdlib/math/base/special/abs' );
var dlamch = require( '@stdlib/lapack/base/dlamch' );
var dgemm = require( '@stdlib/blas/base/dgemm' ).ndarray;
var dlacpy = require( '@stdlib/lapack/base/dlacpy' ).ndarray;
var Float64Array = require( '@stdlib/array/float64' );
var dlaset = require( '@stdlib/lapack/base/dlaset' ).ndarray;
var mod = require( '@stdlib/math/base/special/fmod' );
var min = require( '@stdlib/math/base/special/fast/min' );
var max = require( '@stdlib/math/base/special/fast/max' );
var dtrmm = require( './dtrmm.js' );
var dlarfg = require( './dlarfg.js' );
var dlaqr1 = require( './dlaqr1.js' );


// FUNCTIONS //

/**
* Shuffle shifts into pairs of real shifts and pairs of complex conjugate shifts, assuming that complex conjugate shifts are already adjacent to one another.
*
* @private
* @param {integer} nshifts - number of simultaneous shifts, must be even and positive
* @param {Float64Array} SR - real parts of the shifts of origin that define the QR sweep
* @param {integer} strideSR - stride length of `SR`
* @param {NonNegativeInteger} offsetSR - starting index for `SR`
* @param {Float64Array} SI - imaginary parts of the shifts of origin that define the QR sweep
* @param {integer} strideSI - stride length of `SI`
* @param {NonNegativeInteger} offsetSI - starting index of `SI`
* @returns {void}
*/
function shuffleShifts( nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI ) {
var swap;
var isi;
var isr;
var i;

isi = offsetSI;
isr = offsetSR;
for ( i = 0; i <= nshifts - 2; i += 2 ) {
if ( SI[ isi ] !== -SI[ isi + strideSI ] ) {
swap = SR[ isr ];
SR[ isr ] = SR[ isr + strideSR ];
SR[ isr + strideSR ] = SR[ isr + (2*strideSR) ];
SR[ isr + (2*strideSR) ] = swap;

swap = SI[ isi ];
SI[ isi ] = SI[ isi + strideSI ];
SI[ isi + strideSI ] = SI[ isi + (2*strideSI) ];
SI[ isi + (2*strideSI) ] = swap;
}
isi += (2*strideSI);
isr += (2*strideSR);
}
}


// MAIN //

/**
* Performs a single, small shift multiline QR sweep.
*
* @private
* @param {boolean} wantT - boolean value indicating whether the quasi triangular Schur factor is being computed
* @param {boolean} wantZ - boolean value indicating whether the orthogonal Schur factor is being computed
* @param {integer} kacc22 - integer value ranging from 0 to 2 (inclusive), specifies the computation mode for far-from-diagonal updates
* @param {integer} N - number of rows/columns in `H`
* @param {integer} KTOP - first row and column of the submatrix of `H` where the QR sweep will be applied, should be zero based
* @param {integer} KBOT - last row and column of the submatrix of `H` where the QR sweep will be applied, should be zero based
* @param {integer} nshifts - number of simultaneous shifts, must be even and positive
* @param {Float64Array} SR - real parts of the shifts of origin that define the QR sweep
* @param {integer} strideSR - stride length of `SR`
* @param {NonNegativeInteger} offsetSR - starting index for `SR`
* @param {Float64Array} SI - imaginary parts of the shifts of origin that define the QR sweep
* @param {integer} strideSI - stride length of `SI`
* @param {NonNegativeInteger} offsetSI - starting index of `SI`
* @param {Float64Array} H - input upper hessenberg matrix
* @param {integer} strideH1 - stride of the first dimension of `H`
* @param {integer} strideH2 - stride of the second dimension of `H`
* @param {NonNegativeInteger} offsetH - starting index of `H`
* @param {integer} iloZ - starting row from where the transformation must be applied if `wantZ` is true, should be zero based
* @param {integer} ihiZ - ending row from where the transformation must be applied if `wantZ` is true, should be zero based
* @param {Float64Array} Z - the QR sweep orthogonal similarity transformation is accumulated into `Z` between the rows and columns `iloZ` and `ihiZ` if `wantZ` is true, otherwise `Z` is not referenced
* @param {integer} strideZ1 - stride of the first dimension of `Z`
* @param {integer} strideZ2 - stride of the second dimension of `Z`
* @param {NonNegativeInteger} offsetZ - starting index of `Z`
* @param {Float64Array} V - householder vectors are stored column-wise, used in forming bulges for the multi shift QR algorithm
* @param {integer} strideV1 - stride of the first dimension of `V`
* @param {integer} strideV2 - stride of the second dimension of `V`
* @param {NonNegativeInteger} offsetV - starting index of `V`
* @param {Float64Array} U - used to hold the product of householder reflector that represent accumulated orthogonal transformations from the bulge-chasing process
* @param {integer} strideU1 - stride of the first dimension of `U`
* @param {integer} strideU2 - stride of the second dimension of `U`
* @param {NonNegativeInteger} offsetU - starting index of `U`
* @param {integer} NH - number of columns in `WH` available for workspace
* @param {Float64Array} WH - workspace array
* @param {integer} strideWH1 - stride of the first dimension of `WH`
* @param {integer} strideWH2 - stride of the second dimension of `WH`
* @param {NonNegativeInteger} offsetWH - starting index of `WH`
* @param {integer} NV - number of rows in `WV` available for workspace
* @param {Float64Array} WV - workspace array
* @param {integer} strideWV1 - stride of the first dimension of `WV`
* @param {integer} strideWV2 - stride of the second dimension of `WV`
* @param {NonNegativeInteger} offsetWV - starting index of `WV`
* @returns {void}
*
* @example
* var Float64Array = require( '@stdlib/array/float64' );
*
* var H = new Float64Array( [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] );
* var Z = new Float64Array( [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] );
* var V = new Float64Array( 6 );
* var U = new Float64Array( 10 );
* var WH = new Float64Array( 16 );
* var WV = new Float64Array( 16 );
* var SR = new Float64Array( [ 1.1, 2.2 ] );
* var SI = new Float64Array( [ 0.0, 0.0 ] );
*
* dlaqr5( true, true, 0, 4, 1, 4, 2, SR, 1, 0, SI, 1, 0, H, 4, 1, 0, 1, 4, Z, 4, 1, 0, V, 2, 1, 0, U, 2, 1, 0, 4, WH, 4, 1, 0, 4, WV, 4, 1, 0 );
* // H => <Float64Array>[ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ]
* // Z => <Float64Array>[ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ]
*/
function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI, H, strideH1, strideH2, offsetH, iloZ, ihiZ, Z, strideZ1, strideZ2, offsetZ, V, strideV1, strideV2, offsetV, U, strideU1, strideU2, offsetU, NH, WH, strideWH1, strideWH2, offsetWH, NV, WV, strideWV1, strideWV2, offsetWV ) {
var dlarfgOut;
var block22;
var safmin;
var smlnum;
var refsum;
var mstart;
var incol;
var accum;
var nbmps;
var krcol;
var bmp22;
var alpha;
var ndcol;
var beta;
var jcol;
var jlen;
var jrow;
var jtop;
var jbot;
var mend;
var mtop;
var mbot;
var tst1;
var tst2;
var swap;
var ulp;
var h11;
var h12;
var h21;
var h22;
var kdu;
var kms;
var knz;
var kzs;
var m22;
var isi;
var isr;
var scl;
var k1;
var nu;
var vt;
var i2;
var i4;
var j2;
var j4;
var ns;
var ih;
var k;
var m;
var j;
var i;

dlarfgOut = new Float64Array( 2 ); // Workspace array to pass `alpha` to the `dlarfg` routine
vt = new Float64Array( 3 ); // local array

// If there are no shifts, then there is nothing to do.
if ( nshifts < 2 ) {
return;
}

// If the active block is empty or 1-by-1, then there is nothing to do.
if ( KTOP >= KBOT ) {
return;
}

/*
* Shuffle shifts into pairs of real shifts and pairs of complex conjugate shifts,
* assuming that complex conjugate shifts are already adjacent to one another.
*/
for ( i = 0; i <= nshifts - 2; i += 2 ) {
if ( SI[ isi ] !== -SI[ isi + strideSI ] ) {
swap = SR[ isr ];
SR[ isr ] = SR[ isr + strideSR ];
SR[ isr + strideSR ] = SR[ isr + (2*strideSR) ];
SR[ isr + (2*strideSR) ] = swap;

swap = SI[ isi ];
SI[ isi ] = SI[ isi + strideSI ];
SI[ isi + strideSI ] = SI[ isi + (2*strideSI) ];
SI[ isi + (2*strideSI) ] = swap;
}
isi += (2*strideSI);
isr += (2*strideSR);
}

// `nshifts` is supposed to be even, but if it is odd, then simply reduce it by one. The shuffle above ensures that the dropped shift is real and that the remaining shifts are paired.
ns = nshifts - mod( nshifts, 2 );

// Machine constants for deflation
safmin = dlamch( 'safe minimum' );
ulp = dlamch( 'precision' );
smlnum = safmin * N / ulp;

// Use accumulated reflections to update far-from-diagonal entries?
accum = ( kacc22 === 1 ) || ( kacc22 === 2 );

// Clear trash
if ( KTOP + 2 <= KBOT ) {
ih = offsetH + ( (KTOP+2) * strideH1 ) + ( KTOP * strideH2 );
H[ ih ] = 0.0;
}

// `nbmps` = number of 2-shift bulges in the chain
nbmps = ns / 2;

// KDU = width of slab
kdu = 4 * nbmps;

// Create and chase chains of `nbmps` bulges
for ( incol = KTOP - ( 2*nbmps ) + 1; incol <= KBOT - 2; incol += 2 * nbmps ) {
if ( accum ) {
jtop = max( KTOP, incol );
} else if ( wantT ) {
jtop = 0;
} else {
jtop = KTOP;
}

ndcol = incol + kdu;
if ( accum ) {
dlaset( 'all', kdu, kdu, 0.0, 1.0, U, strideU1, strideU2, offsetU );
}

/*
* Near-the-diagonal bulge chase. The following loop performs the
* near-the-diagonal part of a small bulge multi-shift QR sweep. Each
* `6*nbmps-2` column diagonal chunk extends from column `incol` to column
* `ndcol` (including both column `incol` and column `ndcol`). The following
* loop chases a 3*`nbmps` column long chain of `nbmps` bulges `3*nbmps-2`
* columns to the right. (`incol` may be less than `KTOP` and and `ndcol`
* may be greater than `KBOT` indicating phantom columns from which to
* chase bulges before they are actually introduced or to which to
* chase bulges beyond column `KBOT`.)
*/
for ( krcol = incol; krcol <= min( incol + ( 2*nbmps ) - 1, KBOT - 2 ); krcol += 1 ) {
/*
* Bulges number `mtop` to `mbot` are active double implicit shift bulges.
* There may or may not also be small 2-by-2 bulge, if there is room.
* The inactive bulges (if any) must wait until the active bulges
* have moved down the diagonal to make room. The phantom matrix
* paradigm described above helps keep track.
*/
mtop = max( 0, floor( ( KTOP - krcol ) / 2 ) );
mbot = min( nbmps, floor( ( KBOT - krcol - 1 ) / 2 ) );
m22 = mbot + 1;
bmp22 = ( mbot < nbmps ) && ( krcol + ( 2 * m22 ) === KBOT - 2 );

/*
* Generate reflections to chase the chain right one column.
* (The minimum value of K is KTOP-1.)
*/
if ( bmp22 ) {
// Special case: 2-by-2 reflection at the bottom treated separately
k = krcol + ( 2 * m22 );
if ( k === KTOP - 1 ) {
dlaqr1( 2, H, strideH1, strideH2, offsetH + ((k+1)*strideH1) + ((k+1)*strideH2), SR[ offsetSR + (strideSR*2*m22) ], SI[ offsetSI + (strideSR*2*m22) ], SR[ offsetSR + ( ((2*m22) + 1) * strideSR ) ], SI[ offsetSI + ( ((2*m22) + 1) * strideSI ) ], V, strideV1, offsetV + (m22 * strideH2) );

beta = V[ offsetV + (m22 * strideH2) ];

dlarfgOut[ 0 ] = beta;
dlarfgOut[ 1 ] = 0.0;

dlarfg( 2, V, strideV2, offsetV + strideV2 +(m22*strideV1), dlarfgOut, 1, 0 ); // ====> check here

V[ offsetV + (m22*strideV2) ] = dlarfgOut[ 1 ];
} else {
beta = H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ];
V[ offsetV + (m22*strideV1) + strideV2 ] = H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ];

dlarfgOut[ 0 ] = beta;
dlarfgOut[ 1 ] = 1.0;

dlarfg( 2, V, strideV2, offsetV + (m22*strideV1) + strideV2, outerHeight, 1, 0 );
}
}
}
}
}


// EXPORTS //

module.exports = dlaqr5;
Loading
Loading