diff --git a/src/15utility.js b/src/15utility.js index 3de55a7161..74e1eb4345 100755 --- a/src/15utility.js +++ b/src/15utility.js @@ -65,6 +65,43 @@ function returnTrue() { */ function returnUndefined() {} +/** + SQL-compliant IN check that handles NULL values according to SQL three-valued logic + @param {*} leftValue - The value to check + @param {Array} arr - The array to check against + @return {boolean} True if leftValue is in arr (excluding NULLs), false otherwise + */ +utils.sqlInCheck = function (leftValue, arr) { + var lv = utils.getValueOf(leftValue); + if (lv == null) return false; + for (var i = 0; i < arr.length; i++) { + var iv = utils.getValueOf(arr[i]); + if (iv != null && iv === lv) return true; + } + return false; +}; + +/** + SQL-compliant NOT IN check that handles NULL values according to SQL three-valued logic + @param {*} leftValue - The value to check + @param {Array} arr - The array to check against + @return {boolean} False if leftValue is NULL, or if arr contains NULL and no match found, true otherwise + */ +utils.sqlNotInCheck = function (leftValue, arr) { + var lv = utils.getValueOf(leftValue); + if (lv == null) return false; + var hasNull = false; + for (var i = 0; i < arr.length; i++) { + var iv = utils.getValueOf(arr[i]); + if (iv == null) { + hasNull = true; + } else if (iv === lv) { + return false; + } + } + return !hasNull; +}; + /** Escape string @function diff --git a/src/50expression.js b/src/50expression.js index 39e2e997dd..b286f9d4c3 100755 --- a/src/50expression.js +++ b/src/50expression.js @@ -371,23 +371,23 @@ // If correlated, we cannot cache the results as they depend on the current row const cacheKey = `in${this.queriesidx}`; const checkCorrelated = `(this.queriesfn[${this.queriesidx}].query && this.queriesfn[${this.queriesidx}].query.isCorrelated)`; - const cachedLookup = `((this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = new Set(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).map(alasql.utils.getValueOf)))).has(alasql.utils.getValueOf(${leftJS()})))`; - const uncachedLookup = `(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).indexOf(alasql.utils.getValueOf(${leftJS()})) > -1)`; + const cachedLookup = `((this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).filter(function(v){return alasql.utils.getValueOf(v)!=null}).map(alasql.utils.getValueOf))).indexOf(alasql.utils.getValueOf(${leftJS()}))>-1&&alasql.utils.getValueOf(${leftJS()})!=null)`; + const uncachedLookup = `alasql.utils.sqlInCheck(${leftJS()},alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})))`; s = `(${checkCorrelated} ? ${uncachedLookup} : ${cachedLookup})`; } else if (Array.isArray(this.right)) { if (!alasql.options.cache || this.right.some(value => value instanceof yy.ParamValue)) { // Leverage JS Set for faster lookups than arrays - s = `(new Set([${this.right.map(ref).join(',')}]).has(alasql.utils.getValueOf(${leftJS()})))`; + s = `alasql.utils.sqlInCheck(${leftJS()},[${this.right.map(ref).join(',')}])`; } else { // Use a cache to avoid re-creating the Set on every identical query alasql.sets = alasql.sets || {}; const allValues = this.right.map(value => value.value); const allValuesStr = allValues.join(','); - alasql.sets[allValuesStr] = alasql.sets[allValuesStr] || new Set(allValues); - s = `alasql.sets["${allValuesStr}"].has(alasql.utils.getValueOf(${leftJS()}))`; + alasql.sets[allValuesStr] = alasql.sets[allValuesStr] || new Set(allValues.filter(v => v != null)); + s = `(alasql.sets["${allValuesStr}"].has(alasql.utils.getValueOf(${leftJS()}))&&alasql.utils.getValueOf(${leftJS()})!=null)`; } } else { - s = `(${rightJS()}.indexOf(${leftJS()}) > -1)`; + s = `alasql.utils.sqlInCheck(${leftJS()},${rightJS()})`; } } else if (this.op === 'NOT IN') { if (this.right instanceof yy.Select) { @@ -395,23 +395,24 @@ // If correlated, we cannot cache the results as they depend on the current row const cacheKey = `notIn${this.queriesidx}`; const checkCorrelated = `(this.queriesfn[${this.queriesidx}].query && this.queriesfn[${this.queriesidx}].query.isCorrelated)`; - const cachedLookup = `(!(this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = new Set(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).map(alasql.utils.getValueOf)))).has(alasql.utils.getValueOf(${leftJS()})))`; - const uncachedLookup = `(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).indexOf(alasql.utils.getValueOf(${leftJS()})) < 0)`; + const cachedLookup = `((this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context}))),alasql.utils.sqlNotInCheck(${leftJS()},this.subqueryCache.${cacheKey})))`; + const uncachedLookup = `alasql.utils.sqlNotInCheck(${leftJS()},alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})))`; s = `(${checkCorrelated} ? ${uncachedLookup} : ${cachedLookup})`; } else if (Array.isArray(this.right)) { if (!alasql.options.cache || this.right.some(value => value instanceof yy.ParamValue)) { // Leverage JS Set for faster lookups than arrays - s = `(!(new Set([${this.right.map(ref).join(',')}]).has(alasql.utils.getValueOf(${leftJS()}))))`; + s = `alasql.utils.sqlNotInCheck(${leftJS()},[${this.right.map(ref).join(',')}])`; } else { // Use a cache to avoid re-creating the Set on every identical query alasql.sets = alasql.sets || {}; const allValues = this.right.map(value => value.value); const allValuesStr = allValues.join(','); - alasql.sets[allValuesStr] = alasql.sets[allValuesStr] || new Set(allValues); - s = `!alasql.sets["${allValuesStr}"].has(alasql.utils.getValueOf(${leftJS()}))`; + const hasNull = allValues.some(v => v == null); + alasql.sets[allValuesStr] = alasql.sets[allValuesStr] || new Set(allValues.filter(v => v != null)); + s = `(${hasNull}?false:(alasql.utils.getValueOf(${leftJS()})!=null&&!alasql.sets["${allValuesStr}"].has(alasql.utils.getValueOf(${leftJS()}))))`; } } else { - s = `(${rightJS()}.indexOf(${leftJS()}) === -1)`; + s = `alasql.utils.sqlNotInCheck(${leftJS()},${rightJS()})`; } } diff --git a/test/test1414.js b/test/test1414.js new file mode 100644 index 0000000000..14620cef0a --- /dev/null +++ b/test/test1414.js @@ -0,0 +1,86 @@ +if (typeof exports === 'object') { + var assert = require('assert'); + var alasql = require('..'); +} + +describe('Test 1414 - NOT IN with NULL values should follow SQL semantics', function () { + const test = '1414'; + + before(function () { + alasql('create database test' + test); + alasql('use test' + test); + }); + + after(function () { + alasql('drop database test' + test); + }); + + it('A) NOT IN with NULL in subquery should return empty result', function () { + alasql('CREATE TABLE R (a number)'); + alasql('CREATE TABLE S (b number)'); + alasql.tables.R.data = [{a: 1}, {a: null}]; + alasql.tables.S.data = [{b: null}]; + var res = alasql('select a from R where a not in (select b from S)'); + // When subquery contains NULL, NOT IN should return empty result + // because comparison with NULL is UNKNOWN, and NOT IN UNKNOWN = UNKNOWN (false in WHERE) + assert.deepEqual(res, []); + }); + + it('B) NOT IN with NULL value on left side', function () { + alasql('DROP TABLE IF EXISTS R'); + alasql('DROP TABLE IF EXISTS S'); + alasql('CREATE TABLE R (a number)'); + alasql('CREATE TABLE S (b number)'); + alasql.tables.R.data = [{a: null}, {a: 2}]; + alasql.tables.S.data = [{b: 1}]; + var res = alasql('select a from R where a not in (select b from S)'); + // NULL NOT IN (1) should evaluate to UNKNOWN (excluded from WHERE result) + // 2 NOT IN (1) should be TRUE (included) + assert.deepEqual(res, [{a: 2}]); + }); + + it('C) NOT IN without NULL should work normally', function () { + alasql('DROP TABLE IF EXISTS R'); + alasql('DROP TABLE IF EXISTS S'); + alasql('CREATE TABLE R (a number)'); + alasql('CREATE TABLE S (b number)'); + alasql.tables.R.data = [{a: 1}, {a: 2}, {a: 3}]; + alasql.tables.S.data = [{b: 2}]; + var res = alasql('select a from R where a not in (select b from S)'); + // 1 NOT IN (2) = TRUE, 2 NOT IN (2) = FALSE, 3 NOT IN (2) = TRUE + assert.deepEqual(res, [{a: 1}, {a: 3}]); + }); + + it('D) NOT IN with multiple values including NULL', function () { + alasql('DROP TABLE IF EXISTS R'); + alasql('DROP TABLE IF EXISTS S'); + alasql('CREATE TABLE R (a number)'); + alasql('CREATE TABLE S (b number)'); + alasql.tables.R.data = [{a: 1}, {a: 2}, {a: 3}]; + alasql.tables.S.data = [{b: 2}, {b: null}]; + var res = alasql('select a from R where a not in (select b from S)'); + // When subquery contains NULL, all comparisons are UNKNOWN + assert.deepEqual(res, []); + }); + + it('E) NOT IN with array literal containing NULL', function () { + alasql('DROP TABLE IF EXISTS R'); + alasql('CREATE TABLE R (a number)'); + alasql.tables.R.data = [{a: 1}, {a: 2}, {a: 3}]; + var res = alasql('select a from R where a not in (2, NULL)'); + // When list contains NULL, all NOT IN comparisons are UNKNOWN + assert.deepEqual(res, []); + }); + + it('F) IN with NULL in subquery', function () { + alasql('DROP TABLE IF EXISTS R'); + alasql('DROP TABLE IF EXISTS S'); + alasql('CREATE TABLE R (a number)'); + alasql('CREATE TABLE S (b number)'); + alasql.tables.R.data = [{a: 1}, {a: 2}]; + alasql.tables.S.data = [{b: 1}, {b: null}]; + var res = alasql('select a from R where a in (select b from S)'); + // 1 IN (1, NULL) = TRUE, 2 IN (1, NULL) = UNKNOWN (excluded) + assert.deepEqual(res, [{a: 1}]); + }); +});