Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions src/15utility.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,43 @@ function returnTrue() {
*/
function returnUndefined() {}

/**
SQL-compliant IN check that handles NULL values according to SQL three-valued logic
@param {*} leftValue - The value to check
@param {Array} arr - The array to check against
@return {boolean} True if leftValue is in arr (excluding NULLs), false otherwise
*/
utils.sqlInCheck = function (leftValue, arr) {
var lv = utils.getValueOf(leftValue);
if (lv == null) return false;
for (var i = 0; i < arr.length; i++) {
var iv = utils.getValueOf(arr[i]);
if (iv != null && iv === lv) return true;
}
return false;
};

/**
SQL-compliant NOT IN check that handles NULL values according to SQL three-valued logic
@param {*} leftValue - The value to check
@param {Array} arr - The array to check against
@return {boolean} False if leftValue is NULL, or if arr contains NULL and no match found, true otherwise
*/
utils.sqlNotInCheck = function (leftValue, arr) {
var lv = utils.getValueOf(leftValue);
if (lv == null) return false;
var hasNull = false;
for (var i = 0; i < arr.length; i++) {
var iv = utils.getValueOf(arr[i]);
if (iv == null) {
hasNull = true;
} else if (iv === lv) {
return false;
}
}
return !hasNull;
};

/**
Escape string
@function
Expand Down
25 changes: 13 additions & 12 deletions src/50expression.js
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like way way too much code to make sure we return something correct for in and not in operatoins. Please rethink how this could be implemented in a much less invasive way.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've simplified the implementation significantly. The new approach:

Changes made (commit 386672b):

  • Added two simple utility functions: sqlInCheck and sqlNotInCheck (37 lines in src/15utility.js)
  • Replaced complex inline arrow functions with calls to these utilities
  • Net reduction of 95 lines of code (added 37, removed 132)

Result:

  • Much cleaner and less invasive
  • Integrates well with existing caching logic
  • All 1896 tests passing
  • Original issue verified fixed

The utilities handle SQL NULL semantics in a centralized, maintainable way rather than duplicating complex logic inline.

Original file line number Diff line number Diff line change
Expand Up @@ -371,47 +371,48 @@
// If correlated, we cannot cache the results as they depend on the current row
const cacheKey = `in${this.queriesidx}`;
const checkCorrelated = `(this.queriesfn[${this.queriesidx}].query && this.queriesfn[${this.queriesidx}].query.isCorrelated)`;
const cachedLookup = `((this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = new Set(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).map(alasql.utils.getValueOf)))).has(alasql.utils.getValueOf(${leftJS()})))`;
const uncachedLookup = `(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).indexOf(alasql.utils.getValueOf(${leftJS()})) > -1)`;
const cachedLookup = `((this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).filter(function(v){return alasql.utils.getValueOf(v)!=null}).map(alasql.utils.getValueOf))).indexOf(alasql.utils.getValueOf(${leftJS()}))>-1&&alasql.utils.getValueOf(${leftJS()})!=null)`;
const uncachedLookup = `alasql.utils.sqlInCheck(${leftJS()},alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})))`;
s = `(${checkCorrelated} ? ${uncachedLookup} : ${cachedLookup})`;
} else if (Array.isArray(this.right)) {
if (!alasql.options.cache || this.right.some(value => value instanceof yy.ParamValue)) {
// Leverage JS Set for faster lookups than arrays
s = `(new Set([${this.right.map(ref).join(',')}]).has(alasql.utils.getValueOf(${leftJS()})))`;
s = `alasql.utils.sqlInCheck(${leftJS()},[${this.right.map(ref).join(',')}])`;
} else {
// Use a cache to avoid re-creating the Set on every identical query
alasql.sets = alasql.sets || {};
const allValues = this.right.map(value => value.value);
const allValuesStr = allValues.join(',');
alasql.sets[allValuesStr] = alasql.sets[allValuesStr] || new Set(allValues);
s = `alasql.sets["${allValuesStr}"].has(alasql.utils.getValueOf(${leftJS()}))`;
alasql.sets[allValuesStr] = alasql.sets[allValuesStr] || new Set(allValues.filter(v => v != null));
s = `(alasql.sets["${allValuesStr}"].has(alasql.utils.getValueOf(${leftJS()}))&&alasql.utils.getValueOf(${leftJS()})!=null)`;
}
} else {
s = `(${rightJS()}.indexOf(${leftJS()}) > -1)`;
s = `alasql.utils.sqlInCheck(${leftJS()},${rightJS()})`;
}
} else if (this.op === 'NOT IN') {
if (this.right instanceof yy.Select) {
// Check if this is a correlated subquery (references outer tables)
// If correlated, we cannot cache the results as they depend on the current row
const cacheKey = `notIn${this.queriesidx}`;
const checkCorrelated = `(this.queriesfn[${this.queriesidx}].query && this.queriesfn[${this.queriesidx}].query.isCorrelated)`;
const cachedLookup = `(!(this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = new Set(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).map(alasql.utils.getValueOf)))).has(alasql.utils.getValueOf(${leftJS()})))`;
const uncachedLookup = `(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).indexOf(alasql.utils.getValueOf(${leftJS()})) < 0)`;
const cachedLookup = `((this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context}))),alasql.utils.sqlNotInCheck(${leftJS()},this.subqueryCache.${cacheKey})))`;
const uncachedLookup = `alasql.utils.sqlNotInCheck(${leftJS()},alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})))`;
s = `(${checkCorrelated} ? ${uncachedLookup} : ${cachedLookup})`;
} else if (Array.isArray(this.right)) {
if (!alasql.options.cache || this.right.some(value => value instanceof yy.ParamValue)) {
// Leverage JS Set for faster lookups than arrays
s = `(!(new Set([${this.right.map(ref).join(',')}]).has(alasql.utils.getValueOf(${leftJS()}))))`;
s = `alasql.utils.sqlNotInCheck(${leftJS()},[${this.right.map(ref).join(',')}])`;
} else {
// Use a cache to avoid re-creating the Set on every identical query
alasql.sets = alasql.sets || {};
const allValues = this.right.map(value => value.value);
const allValuesStr = allValues.join(',');
alasql.sets[allValuesStr] = alasql.sets[allValuesStr] || new Set(allValues);
s = `!alasql.sets["${allValuesStr}"].has(alasql.utils.getValueOf(${leftJS()}))`;
const hasNull = allValues.some(v => v == null);
alasql.sets[allValuesStr] = alasql.sets[allValuesStr] || new Set(allValues.filter(v => v != null));
s = `(${hasNull}?false:(alasql.utils.getValueOf(${leftJS()})!=null&&!alasql.sets["${allValuesStr}"].has(alasql.utils.getValueOf(${leftJS()}))))`;
}
} else {
s = `(${rightJS()}.indexOf(${leftJS()}) === -1)`;
s = `alasql.utils.sqlNotInCheck(${leftJS()},${rightJS()})`;
}
}

Expand Down
86 changes: 86 additions & 0 deletions test/test1414.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
if (typeof exports === 'object') {
var assert = require('assert');
var alasql = require('..');
}

describe('Test 1414 - NOT IN with NULL values should follow SQL semantics', function () {
const test = '1414';

before(function () {
alasql('create database test' + test);
alasql('use test' + test);
});

after(function () {
alasql('drop database test' + test);
});

it('A) NOT IN with NULL in subquery should return empty result', function () {
alasql('CREATE TABLE R (a number)');
alasql('CREATE TABLE S (b number)');
alasql.tables.R.data = [{a: 1}, {a: null}];
alasql.tables.S.data = [{b: null}];
var res = alasql('select a from R where a not in (select b from S)');
// When subquery contains NULL, NOT IN should return empty result
// because comparison with NULL is UNKNOWN, and NOT IN UNKNOWN = UNKNOWN (false in WHERE)
assert.deepEqual(res, []);
});

it('B) NOT IN with NULL value on left side', function () {
alasql('DROP TABLE IF EXISTS R');
alasql('DROP TABLE IF EXISTS S');
alasql('CREATE TABLE R (a number)');
alasql('CREATE TABLE S (b number)');
alasql.tables.R.data = [{a: null}, {a: 2}];
alasql.tables.S.data = [{b: 1}];
var res = alasql('select a from R where a not in (select b from S)');
// NULL NOT IN (1) should evaluate to UNKNOWN (excluded from WHERE result)
// 2 NOT IN (1) should be TRUE (included)
assert.deepEqual(res, [{a: 2}]);
});

it('C) NOT IN without NULL should work normally', function () {
alasql('DROP TABLE IF EXISTS R');
alasql('DROP TABLE IF EXISTS S');
alasql('CREATE TABLE R (a number)');
alasql('CREATE TABLE S (b number)');
alasql.tables.R.data = [{a: 1}, {a: 2}, {a: 3}];
alasql.tables.S.data = [{b: 2}];
var res = alasql('select a from R where a not in (select b from S)');
// 1 NOT IN (2) = TRUE, 2 NOT IN (2) = FALSE, 3 NOT IN (2) = TRUE
assert.deepEqual(res, [{a: 1}, {a: 3}]);
});

it('D) NOT IN with multiple values including NULL', function () {
alasql('DROP TABLE IF EXISTS R');
alasql('DROP TABLE IF EXISTS S');
alasql('CREATE TABLE R (a number)');
alasql('CREATE TABLE S (b number)');
alasql.tables.R.data = [{a: 1}, {a: 2}, {a: 3}];
alasql.tables.S.data = [{b: 2}, {b: null}];
var res = alasql('select a from R where a not in (select b from S)');
// When subquery contains NULL, all comparisons are UNKNOWN
assert.deepEqual(res, []);
});

it('E) NOT IN with array literal containing NULL', function () {
alasql('DROP TABLE IF EXISTS R');
alasql('CREATE TABLE R (a number)');
alasql.tables.R.data = [{a: 1}, {a: 2}, {a: 3}];
var res = alasql('select a from R where a not in (2, NULL)');
// When list contains NULL, all NOT IN comparisons are UNKNOWN
assert.deepEqual(res, []);
});

it('F) IN with NULL in subquery', function () {
alasql('DROP TABLE IF EXISTS R');
alasql('DROP TABLE IF EXISTS S');
alasql('CREATE TABLE R (a number)');
alasql('CREATE TABLE S (b number)');
alasql.tables.R.data = [{a: 1}, {a: 2}];
alasql.tables.S.data = [{b: 1}, {b: null}];
var res = alasql('select a from R where a in (select b from S)');
// 1 IN (1, NULL) = TRUE, 2 IN (1, NULL) = UNKNOWN (excluded)
assert.deepEqual(res, [{a: 1}]);
});
});