diff --git a/src/join/join.jl b/src/join/join.jl index 8e0ac90a..6d052d23 100644 --- a/src/join/join.jl +++ b/src/join/join.jl @@ -146,6 +146,7 @@ function _fill_val_join!(x, r2, val, inbits, r) end end end + # F1 and F2 are here for type stability when threads = false function _find_ranges_for_join!(ranges, x, y, _fl::F1, _fr::F2, ::Val{T1}, ::Val{T2}; type = :both, threads = true) where {T1, T2, F1, F2} if type == :both @@ -226,12 +227,31 @@ function _find_ranges_for_join_pa!(ranges, x, invpool, y, _fl::F1, _fr::F2, ::Va end -function _fill_oncols_left_table_left!(_res, x, ranges, en, total, fill_val; threads = true) - @_threadsfor threads for i in 1:length(x) - i == 1 ? lo = 1 : lo = en[i - 1] + 1 - hi = en[i] - _fill_val_join!(_res, lo:hi, x[i]) +function _fill_oncols_left_table_left!(_res, x, ranges, en, total, fill_val; inbits = nothing, en2 = nothing, threads = true) + if inbits === nothing + @_threadsfor threads for i in 1:length(x) + i == 1 ? lo = 1 : lo = en[i - 1] + 1 + hi = en[i] + _fill_val_join!(_res, lo:hi, x[i]) + end + else + @_threadsfor threads for i in 1:length(x) + if i == 1 + lo = 1 + lo2 = 1 + else + lo = en[i - 1] + 1 + lo2 = en2[i-1] + 1 + end + hi = en[i] + # @show sum(view(inbits, lo:hi)) + # sum(view(inbits, lo:hi)) == 0 && continue + hi2 = en2[i] + length(ranges[i]) ===0 ? _fill_val_join!(_res, lo2:hi2, x[i]) : _fill_val_join!(_res, lo2:hi2, x[i], inbits, lo:hi) + + end end + @_threadsfor threads for i in en[length(x)]+1:total _res[i] = fill_val end @@ -273,11 +293,26 @@ function _fill_oncols_left_table_anti!(_res, x, ranges, en, total; threads = tru end end -function _fill_right_cols_table_left!(_res, x, ranges, en, total, fill_val; threads = true) - @_threadsfor threads for i in 1:length(ranges) - i == 1 ? lo = 1 : lo = en[i - 1] + 1 - hi = en[i] - length(ranges[i]) == 0 ? _fill_val_join!(_res, lo:hi, fill_val) : copyto!(_res, lo, x, ranges[i].start, length(ranges[i])) +function _fill_right_cols_table_left!(_res, x, ranges, en, total, fill_val; inbits = nothing, en2 = nothing, threads = true) + if inbits === nothing + @_threadsfor threads for i in 1:length(ranges) + i == 1 ? lo = 1 : lo = en[i - 1] + 1 + hi = en[i] + length(ranges[i]) == 0 ? _fill_val_join!(_res, lo:hi, fill_val) : copyto!(_res, lo, x, ranges[i].start, length(ranges[i])) + end + else + @_threadsfor threads for i in 1:length(ranges) + if i == 1 + lo = 1 + lo2 = 1 + else + lo = en[i - 1] + 1 + lo2 = en2[i-1] + 1 + end + hi = en[i] + hi2 = en2[i] + _fill_right_col_range!(_res, lo2:hi2, x, ranges[i], inbits, lo:hi, fill_val) + end end end @@ -294,13 +329,30 @@ function _fill_right_col_range!(_res, r2, x, ranges, inbits, r) end end +function _fill_right_col_range!(_res, r2, x, ranges, inbits, r, fill_val) + cnt = 1 + cnt_r = 1 + lo = r2.start + for i in r + if inbits[i] + _res[lo+cnt-1] = x[ranges[cnt_r]] + cnt += 1 + end + if !inbits[i] && length(ranges) === 0 + _res[cnt+lo-1] = fill_val + end + cnt_r += 1 + end +end + + function _fill_right_cols_table_inner!(_res, x, ranges, en, total; inbits = nothing, en2 = nothing, threads = true) if inbits === nothing @_threadsfor threads for i in 1:length(ranges) length(ranges[i]) == 0 && continue i == 1 ? lo = 1 : lo = en[i - 1] + 1 hi = en[i] - copyto!(_res, lo, x, ranges[i].start, length(ranges[i])) + copyto!(_res, lo, x, ranges[i].start, length(ranges[i])) end else @_threadsfor threads for i in 1:length(ranges) @@ -320,19 +372,42 @@ function _fill_right_cols_table_inner!(_res, x, ranges, en, total; inbits = noth end end -function _create_multiple_match_col_left(ranges, total_length) +function _create_multiple_match_col_left(ranges, en, total_length) res = allocatecol(Bool, total_length) cnt = 0 - for i in 1:length(ranges) - if length(ranges[i]) == 0 - cnt += 1 - res[cnt] = false - else - if length(ranges[i]) == 1 + if en === nothing + for i in 1:length(ranges) + if length(ranges[i]) == 0 cnt += 1 res[cnt] = false else - for j in ranges[i] + if length(ranges[i]) == 1 + cnt += 1 + res[cnt] = false + else + for j in ranges[i] + cnt += 1 + res[cnt] = true + end + end + end + end + else + for i in 1:length(ranges) + if i == 1 + lo = 1 + else + lo = en[i - 1] + 1 + end + hi = en[i] + if length(lo:hi) == 0 + cnt+=1 + res[cnt] = false + elseif length(lo:hi) == 1 + cnt += 1 + res[cnt] = false + else + for j in lo:hi cnt += 1 res[cnt] = true end @@ -341,6 +416,7 @@ function _create_multiple_match_col_left(ranges, total_length) end res end + function _create_multiple_match_col_inner(ranges, en, total_length) res = allocatecol(Bool, total_length) cnt = 0 @@ -381,7 +457,6 @@ function _create_multiple_match_col_inner(ranges, en, total_length) end end end - res end @@ -393,31 +468,58 @@ ISLE(::Missing, y) = false ISLE(::Missing, ::Missing) = false -function _mark_lt_part!(inbits, x_l, x_r, _fl::F1, _fr::F2, ranges, r_perms, en, ::Val{T}; strict = false, threads = true) where {T, F1, F2} +function _mark_lt_part!(inbits, x_l, x_r, _fl::F1, _fr::F2, ranges, r_perms, en, ::Val{T}; strict = false, threads = true, join_type) where {T, F1, F2} revised_ends = zeros(T, length(en)) - @_threadsfor threads for i in 1:length(ranges) - if length(ranges[i]) == 0 - if i !== 1 - revised_ends[i] = 0 + if join_type === :left + @_threadsfor threads for i in 1:length(ranges) + if length(ranges[i]) == 0 + revised_ends[i] = 1 + continue end - continue - end - i == 1 ? lo = 1 : lo = en[i - 1] + 1 - hi = en[i] - total = 0 - cnt = 1 - for j in ranges[i] - if strict - inbits[lo + cnt - 1] = isless(_fl(x_l[i]), _fr(x_r[r_perms[j]])) - else - inbits[lo + cnt - 1] = ISLE(_fl(x_l[i]), _fr(x_r[r_perms[j]])) + i == 1 ? lo = 1 : lo = en[i - 1] + 1 + hi = en[i] + total = 0 + cnt = 1 + for j in ranges[i] + if strict + inbits[lo + cnt - 1] = isless(_fl(x_l[i]), _fr(x_r[r_perms[j]])) + else + inbits[lo + cnt - 1] = ISLE(_fl(x_l[i]), _fr(x_r[r_perms[j]])) + end + total += inbits[lo + cnt - 1] + cnt += 1 + end + revised_ends[i] = total + if total == 0 + ranges[i] = 1:0 + revised_ends[i] += 1 end - total += inbits[lo + cnt - 1] - cnt += 1 end - revised_ends[i] = total - if total == 0 - ranges[i] = 1:0 + else + @_threadsfor threads for i in 1:length(ranges) + if length(ranges[i]) == 0 + if i !== 1 + revised_ends[i] = 0 + end + continue + end + i == 1 ? lo = 1 : lo = en[i - 1] + 1 + hi = en[i] + total = 0 + cnt = 1 + for j in ranges[i] + if strict + inbits[lo + cnt - 1] = isless(_fl(x_l[i]), _fr(x_r[r_perms[j]])) + else + inbits[lo + cnt - 1] = ISLE(_fl(x_l[i]), _fr(x_r[r_perms[j]])) + end + total += inbits[lo + cnt - 1] + cnt += 1 + end + revised_ends[i] = total + if total == 0 + ranges[i] = 1:0 + end end end our_cumsum!(revised_ends) @@ -459,98 +561,220 @@ function _change_refpool_find_range_for_join!(ranges, dsl, dsr, r_perms, oncols_ end end +function _ranges_join(dsl, dsr, ::Val{T}; onleft, onright,onright_range, makeunique = false, mapformats = [true, true], stable = false,onlyreturnrange = false, alg = HeapSort, check = true, accelerate = false, droprangecols = true, strict_inequality = [false, false], method = :sort, threads = true, multiple_match = false, multiple_match_name = :multiple, obs_id = [false, false], obs_id_name = :obs_id, join_type) where T + oncols_left = onleft + oncols_right = onright + type = :both + right_range_cols = Int[] - -function _join_left(dsl, dsr, ::Val{T}; onleft, onright, makeunique = false, mapformats = [true, true], stable = false, alg = HeapSort, check = true, accelerate = false, method = :sort, threads = true, multiple_match::Bool = false, multiple_match_name = :multiple, obs_id = [false, false], obs_id_name = :obs_id) where T - isempty(dsl) && return copy(dsl) - if method == :hash - ranges, a, idx, minval, reps, sz, right_cols = _find_ranges_for_join_using_hash(dsl, dsr, onleft, onright, mapformats, makeunique, Val(T); threads = threads) - elseif method == :sort - oncols_left = onleft - oncols_right = onright + if onright_range !== nothing + left_range_col = oncols_left[end] + right_range_cols = index(dsr)[filter!(!isequal(nothing), collect(onright_range))] + if droprangecols + right_cols = setdiff(1:length(index(dsr)), [oncols_right; right_range_cols]) + else + right_cols = setdiff(1:length(index(dsr)), oncols_right) + end + + oncols_right = [oncols_right; first(right_range_cols)] + if onright_range[1] !== nothing + if strict_inequality[1] + type = :leftstrict + else + type = :left + end + else + if strict_inequality[2] + type = :rightstrict + else + type = :right + end + end + else right_cols = setdiff(1:length(index(dsr)), oncols_right) - if !makeunique && !isempty(intersect(_names(dsl), _names(dsr)[right_cols])) - throw(ArgumentError("duplicate column names, pass `makeunique = true` to make them unique using a suffix automatically." )) + end + + if !makeunique && !isempty(intersect(_names(dsl), _names(dsr)[right_cols])) + throw(ArgumentError("duplicate column names, pass `makeunique = true` to make them unique using a suffix automatically." )) + end + + nsfpaj = true + # if the columns for inequality like join are PA we cannot use the fast path + if type != :both + if any(i-> DataAPI.refpool(_columns(dsr)[i]) !== nothing, right_range_cols) + nsfpaj = false end + end + if method == :hash && (onright_range === nothing || length(onleft) > 1) + if onright_range !== nothing + ranges, a, idx, minval, reps, sz, right_cols_2 = _find_ranges_for_join_using_hash(dsl, dsr, onleft[1:end-1], oncols_right[1:end-1], mapformats, true, Val(T); threads = threads) + filter!(!=(0), reps) + pushfirst!(reps, 1) + our_cumsum!(reps) + pop!(reps) + grng = GIVENRANGE(idx, reps, Int[], length(reps)) + starts, idx, last_valid_range = _sort_for_join_after_hash(dsr, right_range_cols[1], stable, alg, mapformats, nsfpaj, grng; threads = threads) + _change_refpool_find_range_for_join!(ranges, dsl, dsr, idx, oncols_left, oncols_right, mapformats[1], mapformats[2], length(oncols_left); type = type, nsfpaj = nsfpaj, threads = threads) + else + ranges, a, idx, minval, reps, sz, right_cols = _find_ranges_for_join_using_hash(dsl, dsr, onleft, onright, mapformats, makeunique, Val(T); threads = threads) + end + else ranges = Vector{UnitRange{T}}(undef, nrow(dsl)) - if length(oncols_left) == 1 && nrow(dsr)>1 - success, result = _join_left_dict(dsl, dsr, ranges, oncols_left, oncols_right, right_cols, Val(T); makeunique = makeunique, mapformats = mapformats, check = check, threads = threads, multiple_match = multiple_match, multiple_match_name = multiple_match_name, obs_id = obs_id, obs_id_name = obs_id_name) + if length(oncols_left) == 1 && type == :both && nrow(dsr)>1 + if join_type === :left + success, result = _join_left_dict(dsl, dsr, ranges, oncols_left, oncols_right, right_cols, Val(T); makeunique = makeunique, mapformats = mapformats, check = check, threads = threads, multiple_match = multiple_match, multiple_match_name = multiple_match_name, obs_id = obs_id, obs_id_name = obs_id_name) + else + success, result = _join_inner_dict(dsl, dsr, ranges, oncols_left, oncols_right, right_cols, Val(T); makeunique = makeunique, mapformats = mapformats, check = check, threads = threads, multiple_match = multiple_match, multiple_match_name = multiple_match_name, obs_id = obs_id, obs_id_name = obs_id_name) + end if success return result end end - idx, uniquemode = _find_permute_and_fill_range_for_join!(ranges, dsr, dsl, oncols_right, oncols_left, stable, alg, mapformats, accelerate; threads = threads) + idx, uniquemode = _find_permute_and_fill_range_for_join!(ranges, dsr, dsl, oncols_right, oncols_left, stable, alg, mapformats, accelerate && (onright_range == nothing || length(oncols_right)>1); nsfpaj = nsfpaj, threads = threads) - for j in 1:length(oncols_left) - _change_refpool_find_range_for_join!(ranges, dsl, dsr, idx, oncols_left, oncols_right, mapformats[1], mapformats[2], j; threads = threads) + for j in 1:length(oncols_left)-1 + _change_refpool_find_range_for_join!(ranges, dsl, dsr, idx, oncols_left, oncols_right, mapformats[1], mapformats[2], j; nsfpaj = nsfpaj, threads = threads) end + _change_refpool_find_range_for_join!(ranges, dsl, dsr, idx, oncols_left, oncols_right, mapformats[1], mapformats[2], length(oncols_left); type = type, nsfpaj = nsfpaj, threads = threads) + end + + if join_type === :left + new_ends = map(x -> max(length(x),1), ranges) + else + new_ends = map(length,ranges) end - new_ends = map(x -> max(1, length(x)), ranges) our_cumsum!(new_ends) total_length = new_ends[end] + inbits = nothing + revised_ends = nothing + if length(right_range_cols) == 2 + inbits = zeros(Bool, total_length) + # TODO any optimisation is needed for pa? + _fl = identity + _fr = identity + if mapformats[1] + _fl = getformat(dsl, left_range_col) + end + if mapformats[2] + _fr = getformat(dsr, right_range_cols[2]) + end + revised_ends = _mark_lt_part!(inbits, _columns(dsl)[left_range_col], _columns(dsr)[right_range_cols[2]], _fl, _fr, ranges, idx, new_ends, total_length < typemax(Int32) ? Val(Int32) : Val(Int64); strict = strict_inequality[2], threads = threads, join_type) + if join_type === :left + total_length = revised_ends[end] + else + total_length = sum(inbits) + end + end + + if onlyreturnrange + return ranges + end + if check @assert total_length < 10*nrow(dsl) "the output data set will be very large ($(total_length)×$(ncol(dsl)+length(right_cols))) compared to the left data set size ($(nrow(dsl))×$(ncol(dsl))), make sure that the `on` keyword is selected properly, alternatively, pass `check = false` to ignore this error." end if multiple_match - multiple_match_col = _create_multiple_match_col_left(ranges, total_length) + if join_type === :left + multiple_match_col = _create_multiple_match_col_left(ranges,revised_ends, total_length) + else + multiple_match_col = _create_multiple_match_col_inner(ranges, revised_ends, total_length) + end end res = [] for j in 1:length(index(dsl)) - addmissing = false _res = allocatecol(_columns(dsl)[j], total_length, addmissing = false) if DataAPI.refpool(_res) !== nothing - # fill_val = DataAPI.invrefpool(_res)[missing] - _fill_oncols_left_table_left!(_res.refs, DataAPI.refarray(_columns(dsl)[j]), ranges, new_ends, total_length, missing; threads = threads) + if join_type === :left + _fill_oncols_left_table_left!(_res.refs, DataAPI.refarray(_columns(dsl)[j]), ranges, new_ends, total_length, missing; inbits = inbits, en2 = revised_ends, threads = threads) + else + _fill_oncols_left_table_inner!(_res.refs, DataAPI.refarray(_columns(dsl)[j]), ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) + end else - _fill_oncols_left_table_left!(_res, _columns(dsl)[j], ranges, new_ends, total_length, missing; threads = threads) + if join_type === :left + _fill_oncols_left_table_left!(_res, _columns(dsl)[j], ranges, new_ends, total_length, missing; inbits = inbits, en2 = revised_ends, threads = threads) + else + _fill_oncols_left_table_inner!(_res, _columns(dsl)[j], ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) + end end push!(res, _res) - end + if dsl isa SubDataset newds = Dataset(res, copy(index(dsl)), copycols = false) else newds = Dataset(res, Index(copy(index(dsl).lookup), copy(index(dsl).names), copy(index(dsl).format)), copycols = false) end - for j in 1:length(right_cols) - _res = allocatecol(_columns(dsr)[right_cols[j]], total_length) + if join_type === :left + if dsr isa SubDataset + _res = allocatecol(_columns(copy(dsr))[right_cols[j]], total_length, addmissing = true) + else + _res = allocatecol(_columns(dsr)[right_cols[j]], total_length, addmissing = true) + end + else + _res = allocatecol(_columns(dsr)[right_cols[j]], total_length, addmissing = false) + end + if DataAPI.refpool(_res) !== nothing - fill_val = DataAPI.invrefpool(_res)[missing] - _fill_right_cols_table_left!(_res.refs, view(DataAPI.refarray(_columns(dsr)[right_cols[j]]), idx), ranges, new_ends, total_length, fill_val; threads = threads) + if join_type === :left + fill_val = DataAPI.invrefpool(_res)[missing] + _fill_right_cols_table_left!(_res.refs, view(DataAPI.refarray(_columns(dsr)[right_cols[j]]), idx), ranges, new_ends, total_length, fill_val; inbits = inbits, en2 = revised_ends, threads = threads) + else + _fill_right_cols_table_inner!(_res.refs, view(DataAPI.refarray(_columns(dsr)[right_cols[j]]), idx), ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) + end else - _fill_right_cols_table_left!(_res, view(_columns(dsr)[right_cols[j]], idx), ranges, new_ends, total_length, missing; threads = threads) + if join_type === :left + _fill_right_cols_table_left!(_res, view(DataAPI.refarray(_columns(dsr)[right_cols[j]]), idx), ranges, new_ends, total_length, missing; inbits = inbits, en2 = revised_ends, threads = threads) + else + _fill_right_cols_table_inner!(_res, view(DataAPI.refarray(_columns(dsr)[right_cols[j]]), idx), ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) + end end push!(_columns(newds), _res) - new_var_name = make_unique([_names(dsl); _names(dsr)[right_cols[j]]], makeunique = makeunique)[end] push!(index(newds), new_var_name) setformat!(newds, index(newds)[new_var_name], getformat(dsr, _names(dsr)[right_cols[j]])) end + if multiple_match insertcols!(newds, ncol(newds)+1, multiple_match_name => multiple_match_col, unsupported_copy_cols = false, makeunique = makeunique) end + if obs_id[1] obs_id_name1 = Symbol(obs_id_name, "_left") obs_id_left = allocatecol(nrow(dsl) < typemax(Int32) ? Int32 : Int64, total_length) - _fill_oncols_left_table_left!(obs_id_left, 1:nrow(dsl), ranges, new_ends, total_length, missing; threads = threads) + if join_type === :left + _fill_oncols_left_table_left!(obs_id_left, 1:nrow(dsl), ranges, new_ends, total_length, missing; inbits = inbits, en2 = revised_ends, threads = threads) + else + _fill_oncols_left_table_inner!(obs_id_left, 1:nrow(dsl), ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) + end insertcols!(newds, ncol(newds)+1, obs_id_name1 => obs_id_left, unsupported_copy_cols = false, makeunique = makeunique) end if obs_id[2] obs_id_name2 = Symbol(obs_id_name, "_right") obs_id_right = allocatecol(T, total_length) - _fill_right_cols_table_left!(obs_id_right, idx, ranges, new_ends, total_length, missing, threads = threads) + if join_type === :left + _fill_right_cols_table_left!(obs_id_right, idx, ranges, new_ends, total_length, missing; inbits = inbits, en2 = revised_ends, threads = threads) + else + _fill_right_cols_table_inner!(obs_id_right, idx, ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) + end insertcols!(newds, ncol(newds)+1, obs_id_name2 => obs_id_right, unsupported_copy_cols = false, makeunique = makeunique) end newds end + + +function _join_left(dsl, dsr, ::Val{T}; onleft, onright,onright_range, makeunique = false, mapformats = [true, true], stable = false, alg = HeapSort, check = true, accelerate = false, droprangecols = true, strict_inequality = [false, false], onlyreturnrange = false, method = :sort, threads = true, multiple_match = false, multiple_match_name = :multiple, obs_id = [false, false], obs_id_name = :obs_id) where T + isempty(dsl) && return copy(dsl) + _ranges_join(dsl, dsr, nrow(dsr) < typemax(Int32) ? Val(Int32) : Val(Int64), onleft = onleft, onright = onright, onright_range = onright_range, stable = stable, onlyreturnrange = onlyreturnrange, strict_inequality = strict_inequality, makeunique = makeunique, mapformats = mapformats,accelerate = accelerate, check = check, droprangecols = droprangecols, method = method, threads = threads, multiple_match = multiple_match, multiple_match_name = multiple_match_name, obs_id = obs_id, obs_id_name = obs_id_name, join_type=:left) +end + function _join_left!(dsl::Dataset, dsr::AbstractDataset, ::Val{T}; onleft, onright, makeunique = false, mapformats = [true, true], stable = false, alg = HeapSort, check = true, accelerate = false, method = :sort, threads = true, multiple_match = false, multiple_match_name = :multiple, obs_id = [false, false], obs_id_name = :obs_id) where T isempty(dsl) && return dsl if method == :hash @@ -569,11 +793,14 @@ function _join_left!(dsl::Dataset, dsr::AbstractDataset, ::Val{T}; onleft, onrig return result end end + idx, uniquemode = _find_permute_and_fill_range_for_join!(ranges, dsr, dsl, oncols_right, oncols_left, stable, alg, mapformats, accelerate, threads = threads) + for j in 1:length(oncols_left) _change_refpool_find_range_for_join!(ranges, dsl, dsr, idx, oncols_left, oncols_right, mapformats[1], mapformats[2], j, threads = threads) end end + if !all(x->length(x) <= 1, ranges) throw(ArgumentError("`leftjoin!` can only be used when each observation in left data set matches at most one observation from right data set")) end @@ -625,159 +852,7 @@ end function _join_inner(dsl, dsr::AbstractDataset, ::Val{T}; onleft, onright, onright_range = nothing , makeunique = false, mapformats = [true, true], stable = false, alg = HeapSort, check = true, accelerate = false, droprangecols = true, strict_inequality = [false, false], method = :sort, threads = true, onlyreturnrange = false, multiple_match = false, multiple_match_name = :multiple, obs_id = [false, false], obs_id_name = :obs_id) where T (isempty(dsl) || isempty(dsr)) && throw(ArgumentError("in `innerjoin` both left and right tables must be non-empty")) - oncols_left = onleft - oncols_right = onright - type = :both - right_range_cols = Int[] - if onright_range !== nothing - left_range_col = oncols_left[end] - - right_range_cols = index(dsr)[filter!(!isequal(nothing), collect(onright_range))] - if droprangecols - right_cols = setdiff(1:length(index(dsr)), [oncols_right; right_range_cols]) - else - right_cols = setdiff(1:length(index(dsr)), oncols_right) - end - - oncols_right = [oncols_right; first(right_range_cols)] - if onright_range[1] !== nothing - if strict_inequality[1] - type = :leftstrict - else - type = :left - end - else - if strict_inequality[2] - type = :rightstrict - else - type = :right - end - end - else - right_cols = setdiff(1:length(index(dsr)), oncols_right) - end - if !makeunique && !isempty(intersect(_names(dsl), _names(dsr)[right_cols])) - throw(ArgumentError("duplicate column names, pass `makeunique = true` to make them unique using a suffix automatically." )) - end - - nsfpaj = true - # if the columns for inequality like join are PA we cannot use the fast path - if type != :both - if any(i-> DataAPI.refpool(_columns(dsr)[i]) !== nothing, right_range_cols) - nsfpaj = false - end - end - # if (onright_range === nothing || length(onleft) > 1) is false, then we have inequality kind join with no exact match join - if method == :hash && (onright_range === nothing || length(onleft) > 1) - if onright_range !== nothing - ranges, a, idx, minval, reps, sz, right_cols_2 = _find_ranges_for_join_using_hash(dsl, dsr, onleft[1:end-1], oncols_right[1:end-1], mapformats, true, Val(T); threads = threads) - filter!(!=(0), reps) - pushfirst!(reps, 1) - our_cumsum!(reps) - pop!(reps) - grng = GIVENRANGE(idx, reps, Int[], length(reps)) - starts, idx, last_valid_range = _sort_for_join_after_hash(dsr, right_range_cols[1], stable, alg, mapformats, nsfpaj, grng; threads = threads) - _change_refpool_find_range_for_join!(ranges, dsl, dsr, idx, oncols_left, oncols_right, mapformats[1], mapformats[2], length(oncols_left); type = type, nsfpaj = nsfpaj, threads = threads) - else - ranges, a, idx, minval, reps, sz, right_cols = _find_ranges_for_join_using_hash(dsl, dsr, onleft, onright, mapformats, makeunique, Val(T); threads = threads) - end - else - ranges = Vector{UnitRange{T}}(undef, nrow(dsl)) - if length(oncols_left) == 1 && type == :both && nrow(dsr)>1 - success, result = _join_inner_dict(dsl, dsr, ranges, oncols_left, oncols_right, right_cols, Val(T); makeunique = makeunique, mapformats = mapformats, check = check, threads = threads, multiple_match = multiple_match, multiple_match_name = multiple_match_name, obs_id = obs_id, obs_id_name = obs_id_name) - if success - return result - end - end - idx, uniquemode = _find_permute_and_fill_range_for_join!(ranges, dsr, dsl, oncols_right, oncols_left, stable, alg, mapformats, accelerate && (onright_range == nothing || length(oncols_right)>1); nsfpaj = nsfpaj, threads = threads) - - for j in 1:length(oncols_left)-1 - _change_refpool_find_range_for_join!(ranges, dsl, dsr, idx, oncols_left, oncols_right, mapformats[1], mapformats[2], j; nsfpaj = nsfpaj, threads = threads) - end - _change_refpool_find_range_for_join!(ranges, dsl, dsr, idx, oncols_left, oncols_right, mapformats[1], mapformats[2], length(oncols_left); type = type, nsfpaj = nsfpaj, threads = threads) - end - - - - new_ends = map(length, ranges) - our_cumsum!(new_ends) - total_length = new_ends[end] - - inbits = nothing - revised_ends = nothing - if length(right_range_cols) == 2 - inbits = zeros(Bool, total_length) - # TODO any optimisation is needed for pa? - _fl = identity - _fr = identity - if mapformats[1] - _fl = getformat(dsl, left_range_col) - end - if mapformats[2] - _fr = getformat(dsr, right_range_cols[2]) - end - revised_ends = _mark_lt_part!(inbits, _columns(dsl)[left_range_col], _columns(dsr)[right_range_cols[2]], _fl, _fr, ranges, idx, new_ends, total_length < typemax(Int32) ? Val(Int32) : Val(Int64); strict = strict_inequality[2], threads = threads) - end - if length(right_range_cols) == 2 - total_length = sum(inbits) - end - - if onlyreturnrange - return ranges - end - if check - @assert total_length < 10*nrow(dsl) "the output data set will be very large ($(total_length)×$(ncol(dsl)+length(right_cols))) compared to the left data set size ($(nrow(dsl))×$(ncol(dsl))), make sure that the `on` keyword is selected properly, alternatively, pass `check = false` to ignore this error." - end - if multiple_match - multiple_match_col = _create_multiple_match_col_inner(ranges, revised_ends, total_length) - end - - res = [] - for j in 1:length(index(dsl)) - _res = allocatecol(_columns(dsl)[j], total_length, addmissing = false) - if DataAPI.refpool(_res) !== nothing - _fill_oncols_left_table_inner!(_res.refs, DataAPI.refarray(_columns(dsl)[j]), ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) - else - _fill_oncols_left_table_inner!(_res, _columns(dsl)[j], ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) - end - push!(res, _res) - end - if dsl isa SubDataset - newds = Dataset(res, copy(index(dsl)), copycols = false) - else - newds = Dataset(res, Index(copy(index(dsl).lookup), copy(index(dsl).names), copy(index(dsl).format)), copycols = false) - end - - for j in 1:length(right_cols) - _res = allocatecol(_columns(dsr)[right_cols[j]], total_length, addmissing = false) - if DataAPI.refpool(_res) !== nothing - _fill_right_cols_table_inner!(_res.refs, view(DataAPI.refarray(_columns(dsr)[right_cols[j]]), idx), ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) - else - _fill_right_cols_table_inner!(_res, view(_columns(dsr)[right_cols[j]], idx), ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) - end - push!(_columns(newds), _res) - - new_var_name = make_unique([_names(dsl); _names(dsr)[right_cols[j]]], makeunique = makeunique)[end] - push!(index(newds), new_var_name) - setformat!(newds, index(newds)[new_var_name], getformat(dsr, _names(dsr)[right_cols[j]])) - end - if multiple_match - insertcols!(newds, ncol(newds)+1, multiple_match_name => multiple_match_col, unsupported_copy_cols = false, makeunique = makeunique) - end - if obs_id[1] - obs_id_name1 = Symbol(obs_id_name, "_left") - obs_id_left = allocatecol(nrow(dsl) < typemax(Int32) ? Int32 : Int64, total_length) - _fill_oncols_left_table_inner!(obs_id_left, 1:nrow(dsl), ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) - insertcols!(newds, ncol(newds)+1, obs_id_name1 => obs_id_left, unsupported_copy_cols = false, makeunique = makeunique) - end - if obs_id[2] - obs_id_name2 = Symbol(obs_id_name, "_right") - obs_id_right = allocatecol(T, total_length) - _fill_right_cols_table_inner!(obs_id_right, idx, ranges, new_ends, total_length; inbits = inbits, en2 = revised_ends, threads = threads) - insertcols!(newds, ncol(newds)+1, obs_id_name2 => obs_id_right, unsupported_copy_cols = false, makeunique = makeunique) - end - newds - + _ranges_join(dsl, dsr, nrow(dsr) < typemax(Int32) ? Val(Int32) : Val(Int64), onleft = onleft, onright = onright, onright_range = onright_range, stable = stable, onlyreturnrange = onlyreturnrange, strict_inequality = strict_inequality, makeunique = makeunique, mapformats = mapformats,accelerate = accelerate, check = check, droprangecols = droprangecols, method = method, threads = threads, multiple_match = multiple_match, multiple_match_name = multiple_match_name, obs_id = obs_id, obs_id_name = obs_id_name, join_type=:inner) end function _in(dsl::AbstractDataset, dsr::AbstractDataset, ::Val{T}; onleft, onright, mapformats = [true, true], stable = false, alg = HeapSort, accelerate = false, threads = true) where T diff --git a/src/join/main.jl b/src/join/main.jl index d59cb3d2..4c50a689 100644 --- a/src/join/main.jl +++ b/src/join/main.jl @@ -131,7 +131,7 @@ julia> leftjoin(dsl, dsr, on = :year, mapformats = true) # Use formats for datas 4 │ 2012 true missing ``` """ -function DataAPI.leftjoin(dsl::AbstractDataset, dsr::AbstractDataset; on = nothing, makeunique = false, mapformats::Union{Bool, Vector{Bool}} = true, stable = false, alg = HeapSort, check = true, accelerate = false, method::Symbol = :sort, threads::Bool = true, multiple_match::Bool = false, multiple_match_name = :multiple, obs_id::Union{Bool, Vector{Bool}} = false, obs_id_name = :obs_id) +function DataAPI.leftjoin(dsl::AbstractDataset, dsr::AbstractDataset; on = nothing, makeunique = false, mapformats::Union{Bool, Vector{Bool}} = true, stable = false, alg = HeapSort, check = true, accelerate = false, onlyreturnrange = false, droprangecols::Bool = true, strict_inequality = false, method::Symbol = :sort, threads::Bool = true, multiple_match::Bool = false, multiple_match_name = :multiple, obs_id::Union{Bool, Vector{Bool}} = false, obs_id_name = :obs_id) !(method in (:hash, :sort)) && throw(ArgumentError("method must be :hash or :sort")) on === nothing && throw(ArgumentError("`on` keyword must be specified")) if !(on isa AbstractVector) @@ -151,18 +151,30 @@ function DataAPI.leftjoin(dsl::AbstractDataset, dsr::AbstractDataset; on = nothi length(obs_id) !== 2 && throw(ArgumentError("`obs_id` must be a Bool or a vector of Bool with size two")) end + # strict_inequality + if !(strict_inequality isa AbstractVector) + strict_inequality = repeat([strict_inequality], 2) + else + length(strict_inequality) !== 2 && throw(ArgumentError("`strict_inequality` must be a Bool or a vector of Bool with size two")) + end + if typeof(on) <: AbstractVector{<:Union{AbstractString, Symbol}} onleft = multiple_getindex(index(dsl), on) onright = multiple_getindex(index(dsr), on) - + onright_range = nothing elseif (typeof(on) <: AbstractVector{<:Pair{<:ColumnIndex, <:ColumnIndex}}) || (typeof(on) <: AbstractVector{<:Pair{<:AbstractString, <:AbstractString}}) onleft = multiple_getindex(index(dsl), map(x->x.first, on)) onright = multiple_getindex(index(dsr), map(x->x.second, on)) - + onright_range = nothing + elseif (typeof(on) <: AbstractVector{<:Pair{<:ColumnIndex, <:Any}}) || (typeof(on) <: AbstractVector{<:Pair{<:AbstractString, <:Any}}) + onleft = multiple_getindex(index(dsl), map(x->x.first, on)) + onright = multiple_getindex(index(dsr), map(x->x.second, on[1:end-1])) + onright_range = on[end].second + !(onright_range isa Tuple) && throw(ArgumentError("For range join the last element of `on` keyword argument for the right table must be a Tuple of column names")) else throw(ArgumentError("`on` keyword must be a vector of column names or a vector of pairs of column names")) end - _join_left(dsl, dsr, nrow(dsr) < typemax(Int32) ? Val(Int32) : Val(Int64), onleft = onleft, onright = onright, makeunique = makeunique, mapformats = mapformats, stable = stable, alg = alg, check = check, accelerate = accelerate, method = method, threads = threads, multiple_match = multiple_match, multiple_match_name = multiple_match_name, obs_id = obs_id, obs_id_name = obs_id_name) + _join_left(dsl, dsr, nrow(dsr) < typemax(Int32) ? Val(Int32) : Val(Int64), onleft = onleft, onright = onright, onright_range = onright_range, stable = stable, onlyreturnrange = onlyreturnrange, strict_inequality = strict_inequality, makeunique = makeunique, mapformats = mapformats,accelerate = accelerate, check = false,droprangecols = droprangecols, method = method, threads = threads, multiple_match = multiple_match, multiple_match_name = multiple_match_name, obs_id = obs_id, obs_id_name = obs_id_name) end """ @@ -170,7 +182,7 @@ end Variant of `leftjoin` that performs `leftjoin` in place for special case that the number of matching rows from the right data set is at most one. """ -function leftjoin!(dsl::Dataset, dsr::AbstractDataset; on = nothing, makeunique = false, mapformats::Union{Bool, Vector{Bool}} = true, stable = false, alg = HeapSort, accelerate = false, method::Symbol = :sort, threads::Bool = true, multiple_match::Bool=false, multiple_match_name = :multiple, obs_id::Union{Bool, Vector{Bool}} = false, obs_id_name = :obs_id) +function leftjoin!(dsl::Dataset, dsr::AbstractDataset; on = nothing, makeunique = false, mapformats::Union{Bool, Vector{Bool}} = true, stable = false, alg = HeapSort, accelerate = false, strict_inequality = false, method::Symbol = :sort, threads::Bool = true, droprangecols::Bool = true, multiple_match::Bool=false, multiple_match_name = :multiple, obs_id::Union{Bool, Vector{Bool}} = false, obs_id_name = :obs_id) !(method in (:hash, :sort)) && throw(ArgumentError("method must be :hash or :sort")) on === nothing && throw(ArgumentError("`on` keyword must be specified")) if !(on isa AbstractVector) @@ -188,18 +200,30 @@ function leftjoin!(dsl::Dataset, dsr::AbstractDataset; on = nothing, makeunique else length(obs_id) !== 2 && throw(ArgumentError("`obs_id` must be a Bool or a vector of Bool with size two")) end + # strict_inequality + if !(strict_inequality isa AbstractVector) + strict_inequality = repeat([strict_inequality], 2) + else + length(strict_inequality) !== 2 && throw(ArgumentError("`strict_inequality` must be a Bool or a vector of Bool with size two")) + end + if typeof(on) <: AbstractVector{<:Union{AbstractString, Symbol}} onleft = multiple_getindex(index(dsl), on) onright = multiple_getindex(index(dsr), on) - + onright_range = nothing elseif (typeof(on) <: AbstractVector{<:Pair{<:ColumnIndex, <:ColumnIndex}}) || (typeof(on) <: AbstractVector{<:Pair{<:AbstractString, <:AbstractString}}) onleft = multiple_getindex(index(dsl), map(x->x.first, on)) onright = multiple_getindex(index(dsr), map(x->x.second, on)) - + onright_range = nothing + elseif (typeof(on) <: AbstractVector{<:Pair{<:ColumnIndex, <:Any}}) || (typeof(on) <: AbstractVector{<:Pair{<:AbstractString, <:Any}}) + onleft = multiple_getindex(index(dsl), map(x->x.first, on)) + onright = multiple_getindex(index(dsr), map(x->x.second, on[1:end-1])) + onright_range = on[end].second + !(onright_range isa Tuple) && throw(ArgumentError("For range join the last element of `on` keyword argument for the right table must be a Tuple of column names")) else throw(ArgumentError("`on` keyword must be a vector of column names or a vector of pairs of column names")) end - _join_left!(dsl, dsr, nrow(dsr) < typemax(Int32) ? Val(Int32) : Val(Int64), onleft = onleft, onright = onright, makeunique = makeunique, mapformats = mapformats, check = false, method = method, threads = threads, multiple_match = multiple_match, multiple_match_name = multiple_match_name, obs_id = obs_id, obs_id_name = obs_id_name) + _join_left!(dsl, dsr, nrow(dsr) < typemax(Int32) ? Val(Int32) : Val(Int64), onleft = onleft, onright = onright, onright_range = onright_range, stable = stable, strict_inequality = strict_inequality, makeunique = makeunique, mapformats = mapformats,accelerate = accelerate, check = false,droprangecols = droprangecols, method = method, threads = threads, multiple_match = multiple_match, multiple_match_name = multiple_match_name, obs_id = obs_id, obs_id_name = obs_id_name) end """ diff --git a/test/join.jl b/test/join.jl index 443c3879..8bc74ec3 100644 --- a/test/join.jl +++ b/test/join.jl @@ -1903,6 +1903,23 @@ end @test inn_r1_a == inn_r1_t @test inn_r1_v_a == inn_r1_t + left_r1 = leftjoin(store, roster, on = [:date => (:start_date, nothing)], makeunique = true, stable = true) + left_r1_v = leftjoin(store, view(roster, :, :), on = [:date => (:start_date, nothing)], makeunique = true, stable = true) + left_r1_a = leftjoin(store, roster, on = [:date => (:start_date, nothing)], makeunique = true, stable = true, accelerate = true) + left_r1_v_a = leftjoin(store, view(roster, :, :), on = [:date => (:start_date, nothing)], makeunique = true, stable = true, accelerate = true) + + @test left_r1 == leftjoin(store, roster, on = [:date => (:start_date, nothing)], makeunique = true, stable = true, method = :hash) + @test left_r1_v == leftjoin(store, view(roster, :, :), on = [:date => (:start_date, nothing)], makeunique = true, stable = true, method = :hash) + @test left_r1_a == leftjoin(store, roster, on = [:date => (:start_date, nothing)], makeunique = true, stable = true, accelerate = true, method = :hash) + @test left_r1_v_a == leftjoin(store, view(roster, :, :), on = [:date => (:start_date, nothing)], makeunique = true, stable = true, accelerate = true, method = :hash) + + + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2020-01-01"), Date("2020-01-01"), Date("2020-01-01"), Date("2020-01-01"), Date("2020-01-01"), Date("2020-01-01"), Date("2020-01-01"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"], Union{Missing, String}["A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B"], Union{Missing, Int64}[1, 5, 2, 6, 3, 7, 4, 8, 1, 5, 2, 6, 3, 7, 4, 8, 1, 5, 2, 6, 1, 5, 2, 6, 3, 7, 4, 8, 1, 5, 1, 5, 2, 6, 1, 5, 2, 6, 3, 7, 4, 8, 1, 5, 2, 6, 3, 7, 4, 8, 1, 5, 2, 6, 3, 7, 1, 5, 2, 6, 3, 7], Union{Missing, Date}[Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-05")]], ["date", "store", "store_1", "employee_ID", "end_date"]) + @test left_r1 == left_r1_t + @test left_r1_v == left_r1_t + @test left_r1_a == left_r1_t + @test left_r1_v_a == left_r1_t + inn_r1 = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, nothing)], stable = true) inn_r1_v = innerjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, nothing)], stable = true) inn_r1_a = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, nothing)], stable = true, accelerate = true) @@ -1918,6 +1935,23 @@ end @test inn_r1_v == inn_r1_t @test inn_r1_a == inn_r1_t @test inn_r1_v_a == inn_r1_t + + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, nothing)], stable = true) + left_r1_v = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, nothing)], stable = true) + left_r1_a = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, nothing)], stable = true, accelerate = true) + left_r1_v_a = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, nothing)], stable = true, accelerate = true) + + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, nothing)], stable = true, method = :hash) + @test left_r1_v == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, nothing)], stable = true, method = :hash) + @test left_r1_a == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, nothing)], stable = true, accelerate = true, method = :hash) + @test left_r1_v_a == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, nothing)], stable = true, accelerate = true, method = :hash) + + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"), Date("2020-01-01"), Date("2020-01-01"), Date("2020-01-01"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "A", "A", "A", "A", "B", "A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "A", "A", "A", "B", "B", "B"], Union{Missing, Int64}[1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7, 8, 5, 6, 7, 8, 1, 2, 3, 5, 6, 7], Union{Missing, Date}[Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-05")]], ["date", "store", "employee_ID", "end_date"]) + @test left_r1 == left_r1_t + @test left_r1_v == left_r1_t + @test left_r1_a == left_r1_t + @test left_r1_v_a == left_r1_t + inn_r1 = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true) inn_r1_v = innerjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, :end_date)], stable = true) @@ -1935,6 +1969,22 @@ end @test inn_r1_a == inn_r1_t @test inn_r1_v_a == inn_r1_t + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true) + left_r1_v = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, :end_date)], stable = true) + left_r1_a = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, accelerate = true) + left_r1_v_a = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, :end_date)], stable = true, accelerate = true) + + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, method = :hash) + @test left_r1_v == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, :end_date)], stable = true, method = :hash) + @test left_r1_a == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, accelerate = true, method = :hash) + @test left_r1_v_a == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, :end_date)], stable = true, accelerate = true, method = :hash) + + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"),Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "A", "A", "A", "B", "B", "A", "B", "A", "A", "B", "B", "B", "B", "B", "B", "A", "A", "A", "B", "B", "B"], Union{Missing, Int64}[3, 4, 1, 2, 3, 4, 5, 6, missing, 5, 1, 2, 7, 8, 5, 6, 7, 8, 1, 2, 3, 5, 6, 7]], ["date", "store", "employee_ID"]) + @test left_r1 == left_r1_t + @test left_r1_v == left_r1_t + @test left_r1_a == left_r1_t + @test left_r1_v_a == left_r1_t + inn_r1 = innerjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true) inn_r1_v = innerjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:end_date, :start_date)], stable = true) inn_r1_a = innerjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, accelerate = true) @@ -1952,6 +2002,22 @@ end @test inn_r1_a == inn_r1_t @test inn_r1_v_a == inn_r1_t + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true) + left_r1_v = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:end_date, :start_date)], stable = true) + left_r1_a = leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, accelerate = true) + left_r1_v_a = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:end_date, :start_date)], stable = true, accelerate = true) + + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, method = :hash) + @test left_r1_v == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:end_date, :start_date)], stable = true, method = :hash) + @test left_r1_a == leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, accelerate = true, method = :hash) + @test left_r1_v_a == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:end_date, :start_date)], stable = true, accelerate = true, method = :hash) + + left_r1_t = Dataset(date=Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03")], store=Union{Missing, String}["A", "A", "B", "A", "B", "A", "B", "B", "A", "B"], employee_ID=Union{Missing, Int}[missing,missing,missing,missing,missing,missing,missing,missing,missing,missing]) + @test left_r1 == left_r1_t + @test left_r1_v == left_r1_t + @test left_r1_a == left_r1_t + @test left_r1_v_a == left_r1_t + inn_r1 = innerjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true) inn_r1_a = innerjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, accelerate = true) @@ -1962,6 +2028,17 @@ end @test inn_r1 == inn_r1_t @test inn_r1_a == inn_r1_t + + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true) + left_r1_a = leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, accelerate = true) + + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, method = :hash) + @test left_r1_a == leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, accelerate = true, method = :hash) + + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"),Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-01"),Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"),Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "B", "B", "B", "A", "B", "B", "B", "A", "A", "A","B", "B", "A", "A", "B", "B"], Union{Missing, Int64}[missing,4, 6, 7, 8,missing, 6, 7, 8, 2, 3, 4, missing, 8, 3, 4, 7, 8], Union{Missing, Date}[missing, Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), missing, Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), missing, Date("2019-10-06"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-05"), Date("2019-10-06")]], ["date", "store", "employee_ID", "end_date"]) + @test left_r1 == left_r1_t + @test left_r1_a == left_r1_t + inn_r2 = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = true) inn_r2_a = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = true, accelerate = true) @@ -1972,16 +2049,40 @@ end @test inn_r2 == inn_r2_t @test inn_r2_a == inn_r2_t + left_r2 = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = true) + left_r2_a = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = true, accelerate = true) + + @test left_r2 == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = true, method = :hash) + @test left_r2_a == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = true, accelerate = true, method = :hash) + + left_r2_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-02"),Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "B","A", "B", "A", "B", "B", "A", "A", "B", "B"], Union{Missing, Int64}[4, 3, 5,missing, 5, 1, 8, 7, 1, 2, 5, 6]], ["date", "store", "employee_ID"]) + @test left_r2 == left_r2_t + @test left_r2_a == left_r2_t + inn_r2 = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = true, droprangecols = false) @test inn_r2 == innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = true, droprangecols = false, method = :hash) inn_r2_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "B", "B", "A", "B", "B", "A", "A", "B", "B"], Union{Missing, Int64}[4, 3, 5, 5, 1, 8, 7, 1, 2, 5, 6], Union{Missing, Date}[Date("2019-10-04"), Date("2019-10-03"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-09-30"), Date("2019-10-02"), Date("2019-09-30"), Date("2019-10-02")], Union{Missing, Date}[Date("2019-10-06"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-06"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04")]], ["date", "store", "employee_ID", "start_date", "end_date"]) @test inn_r2 == inn_r2_t + + left_r2 = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = true, droprangecols = false) + @test left_r2 == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = true, droprangecols = false, method = :hash) + + left_r2_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-02"),Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "B","A", "B", "A", "B", "B", "A", "A", "B", "B"], Union{Missing, Int64}[4, 3, 5,missing, 5, 1, 8, 7, 1, 2, 5, 6], Union{Missing, Date}[Date("2019-10-04"), Date("2019-10-03"), Date("2019-09-30"),missing, Date("2019-09-30"), Date("2019-09-30"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-09-30"), Date("2019-10-02"), Date("2019-09-30"), Date("2019-10-02")], Union{Missing, Date}[Date("2019-10-06"), Date("2019-10-05"), Date("2019-10-04"),missing, Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-06"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04")]], ["date", "store", "employee_ID", "start_date", "end_date"]) + @test left_r2 == left_r2_t + inn_r2 = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = [true, false], droprangecols = true) @test inn_r2 == innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = [true, false], droprangecols = true, method = :hash) inn_r2_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "A", "A", "B", "B", "A", "B", "B", "B", "B", "B", "A", "A", "B", "B"], Union{Missing, Int64}[3, 4, 1, 2, 3, 5, 5, 1, 7, 8, 5, 6, 7, 1, 2, 5, 6]],["date", "store", "employee_ID"]) @test inn_r2 == inn_r2_t + + left_r2 = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = [true, false], droprangecols = true) + @test left_r2 == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], makeunique = true, stable = true, strict_inequality = [true, false], droprangecols = true, method = :hash) + + left_r2_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-02"),Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "A", "A", "B", "A", "B", "A", "B", "B", "B", "B", "B", "A", "A", "B", "B"], Union{Missing, Int64}[3, 4, 1, 2, 3, 5, missing, 5, 1, 7, 8, 5, 6, 7, 1, 2, 5, 6]],["date", "store", "employee_ID"]) + @test left_r2 == left_r2_t + push!(roster, ["C", 9, Date(2020), Date(2020)]) inn_r1 = innerjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true) @test inn_r1 == innerjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, method = :hash) @@ -1989,6 +2090,12 @@ end inn_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-01"), Date("2019-10-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "B", "B", "B", "B", "B", "B", "A", "A", "A", "B", "A", "A", "B", "B"], Union{Missing, Int64}[4, 6, 7, 8, 6, 7, 8, 2, 3, 4, 8, 3, 4, 7, 8], Union{Missing, Date}[Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-06"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-05"), Date("2019-10-06")]], ["date", "store", "employee_ID", "end_date"]) @test inn_r1 == inn_r1_t + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true) + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, method = :hash) + + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"),Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-01"),Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"),Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "B", "B", "B", "A", "B", "B", "B", "A", "A", "A","B", "B", "A", "A", "B", "B"], Union{Missing, Int64}[missing,4, 6, 7, 8,missing, 6, 7, 8, 2, 3, 4, missing, 8, 3, 4, 7, 8], Union{Missing, Date}[missing, Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), missing, Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-05"), Date("2019-10-06"), missing, Date("2019-10-06"), Date("2019-10-05"), Date("2019-10-06"), Date("2019-10-05"), Date("2019-10-06")]], ["date", "store", "employee_ID", "end_date"]) + @test left_r1 == left_r1_t + roster[4,3] = missing roster[6,4] = missing roster[8,3:4] .= missing @@ -2003,6 +2110,15 @@ end @test inn_r1 == inn_r1_t @test inn_r1_a == inn_r1_t + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true) + left_r1_a = leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, accelerate = true) + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, method = :hash) + @test left_r1_a == leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, accelerate = true, method = :hash) + + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "B", "B", "B", "A", "B", "B", "B", "A", "A", "A", "B", "B", "B", "A", "A", "A", "B", "B"], Union{Missing, Int64}[2, 4, 2, 6, 8, 7, 2, 6, 8, 7, 3, 4, 2, 7, 8, 7, 3, 4, 2, 8, 7], Union{Missing, Date}[Date("2019-10-04"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-06"), missing, Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-06"), missing, missing, Date("2019-10-06"), Date("2019-10-04"), missing, Date("2019-10-06"), missing, missing, Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-06"), missing]], ["date", "store", "employee_ID", "end_date"]) + @test left_r1 == left_r1_t + @test left_r1_a == left_r1_t + inn_r1 = innerjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true) inn_r1_a = innerjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, accelerate = true) @@ -2012,33 +2128,70 @@ end inn_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2020-01-01")], Union{Missing, String}["A", "A", "A"], Union{Missing, Int64}[2, 2, 2]], ["date", "store", "employee_ID"]) @test inn_r1 == inn_r1_t @test inn_r1_a == inn_r1_t + + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true) + left_r1_a = leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, accelerate = true) + + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, method = :hash) + @test left_r1_a == leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, accelerate = true, method = :hash) + + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"),Date("2019-10-02"), Date("2020-01-01"),Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "B", "A", "B", "A", "B", "B", "A", "B"], Union{Missing, Int64}[2, 2,missing, 2, missing, missing, missing, missing, missing, missing]], ["date", "store", "employee_ID"]) + @test left_r1 == left_r1_t + @test left_r1_a == left_r1_t inn_r1 = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true) @test inn_r1 == innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, method = :hash) - inn_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "A", "A", "B", "B", "A", "B", "A", "B", "B", "B", "B", "A", "A", "B", "B"], Union{Missing, Int64}[3, 4, 1, 3, 4, 5, 6, 3, 5, 1, 8, 5, 6, 8, 1, 3, 5, 6]], ["date", "store", "employee_ID"]) @test inn_r1 == inn_r1_t + + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true) + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, method = :hash) + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "A", "A", "B", "B", "A", "B", "A", "B", "B", "B", "B", "A", "A", "B", "B"], Union{Missing, Int64}[3, 4, 1, 3, 4, 5, 6, 3, 5, 1, 8, 5, 6, 8, 1, 3, 5, 6]], ["date", "store", "employee_ID"]) + @test left_r1 == left_r1_t + MONTH(x) = month(x) MONTH(::Missing) = missing setformat!(store, 1=>MONTH) setformat!(roster, r"date"=>MONTH) + inn_r3 = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [true, false]) @test inn_r3 == innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [true, false], method = :hash) inn_r3_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "B", "B", "A", "B", "B", "A", "B"], Union{Missing, Int64}[1, 1, 5, 5, 1, 5, 5, 1, 5], Union{Missing, Date}[Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30")], Union{Missing, Date}[Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04")]], ["date", "store", "employee_ID", "start_date", "end_date"]) @test inn_r3 == inn_r3_t + + left_r3 = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [true, false]) + @test left_r3 == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [true, false], method = :hash) + + left_r3_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-02"),Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "B","A", "B", "A", "B", "B", "A", "B"], Union{Missing, Int64}[1, 1, 5,missing, 5, 1, 5, 5, 1, 5], Union{Missing, Date}[Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"),missing, Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30"), Date("2019-09-30")], Union{Missing, Date}[Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"),missing, Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04")]], ["date", "store", "employee_ID", "start_date", "end_date"]) + @test left_r3 == left_r3_t + inn_r3 = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [false, true]) @test inn_r3 == innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [false, true], method = :hash) inn_r3_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "A"], Union{Missing, Int64}[3, 3, 3, 3], Union{Missing, Date}[Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, Date}[missing, missing, missing, missing]], ["date", "store", "employee_ID", "start_date", "end_date"]) @test inn_r3 == inn_r3_t + + left_r3 = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [false, true]) + @test left_r3 == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [false, true], method = :hash) + + left_r3_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A","B","A","B", "A","B","B", "A","B"], Union{Missing, Int64}[3, 3,missing,missing,missing, 3,missing,missing, 3,missing], Union{Missing, Date}[Date("2019-10-03"), Date("2019-10-03"),missing,missing,missing, Date("2019-10-03"),missing,missing, Date("2019-10-03"),missing], Union{Missing, Date}[missing, missing, missing, missing,missing,missing,missing,missing,missing,missing]], ["date", "store", "employee_ID", "start_date", "end_date"]) + @test left_r3 == left_r3_t + inn_r3 = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [true, true]) @test inn_r3 == innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [true, true], method = :hash) inn_r3_t = Dataset([Union{Missing, Date}[], Union{Missing, String}[], Union{Missing, Int64}[], Union{Missing, Date}[], Union{Missing, Date}[]], ["date", "store", "employee_ID", "start_date", "end_date"]) @test inn_r3 == inn_r3_t + left_r3 = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [true, true]) + @test left_r3 == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], droprangecols = false, strict_inequality = [true, true], method = :hash) + + left_r3_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A","B","A","B", "A","B","B", "A","B"], Union{Missing, Int64}[missing, missing,missing,missing,missing, missing,missing,missing, missing,missing], Union{Missing, Date}[missing, missing,missing,missing,missing, missing,missing,missing, missing,missing], Union{Missing, Date}[missing, missing, missing, missing,missing,missing,missing,missing,missing,missing]], ["date", "store", "employee_ID", "start_date", "end_date"]) + @test left_r3 == left_r3_t + + inn_r1 = innerjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false) inn_r1_v = innerjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false) inn_r1_a = innerjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false, accelerate = true) @@ -2056,6 +2209,25 @@ end @test inn_r1_a == inn_r1_t @test inn_r1_v_a == inn_r1_t + + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false) + left_r1_v = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false) + left_r1_a = leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false, accelerate = true) + left_r1_v_a = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false, accelerate = true) + + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false, method = :hash) + @test left_r1_v == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false, method = :hash) + @test left_r1_a == leftjoin(store, roster, on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false, accelerate = true, method = :hash) + @test left_r1_v_a == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (nothing, :start_date)], stable = true, mapformats = false, accelerate = true, method = :hash) + + + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "B", "B", "B", "A", "B", "B", "B", "A", "A", "A", "B", "B", "B", "A", "A", "A", "B", "B"], Union{Missing, Int64}[2, 4, 2, 6, 8, 7, 2, 6, 8, 7, 3, 4, 2, 7, 8, 7, 3, 4, 2, 8, 7], Union{Missing, Date}[Date("2019-10-04"), Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-06"), missing, Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-06"), missing, missing, Date("2019-10-06"), Date("2019-10-04"), missing, Date("2019-10-06"), missing, missing, Date("2019-10-06"), Date("2019-10-04"), Date("2019-10-06"), missing]], ["date", "store", "employee_ID", "end_date"]) + @test left_r1 == left_r1_t + @test left_r1_v == left_r1_t + @test left_r1_a == left_r1_t + @test left_r1_v_a == left_r1_t + + inn_r1 = innerjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false) inn_r1_v = innerjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false) inn_r1_a = innerjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false, accelerate = true) @@ -2072,6 +2244,23 @@ end @test inn_r1_a == inn_r1_t @test inn_r1_v_a == inn_r1_t + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false) + left_r1_v = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false) + left_r1_a = leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false, accelerate = true) + left_r1_v_a = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false, accelerate = true) + + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false, method = :hash) + @test left_r1_v == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false, method = :hash) + @test left_r1_a == leftjoin(store, roster, on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false, accelerate = true, method = :hash) + @test left_r1_v_a == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:end_date, :start_date)], stable = true, mapformats = false, accelerate = true, method = :hash) + + + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "B", "A", "B", "A", "B", "B", "A", "B"], Union{Missing, Int64}[2, 2,missing, 2, missing, missing, missing, missing, missing, missing]], ["date", "store", "employee_ID"]) + @test left_r1 == left_r1_t + @test left_r1_v == left_r1_t + @test left_r1_a == left_r1_t + @test left_r1_v_a == left_r1_t + inn_r1 = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false) inn_r1_v = innerjoin(store, view(roster, :, [1,2, 4,3]), on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false) inn_r1_a = innerjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false, accelerate = true) @@ -2088,6 +2277,22 @@ end @test inn_r1_a == inn_r1_t @test inn_r1_v_a == inn_r1_t + left_r1 = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false) + left_r1_v = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false) + left_r1_a = leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false, accelerate = true) + left_r1_v_a = leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false, accelerate = true) + + @test left_r1 == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false, method = :hash) + @test left_r1_v == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false, method = :hash) + @test left_r1_a == leftjoin(store, roster, on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false, accelerate = true, method = :hash) + @test left_r1_v_a == leftjoin(store, view(roster, :, [1,2,4,3]), on = [:store => :store, :date => (:start_date, :end_date)], stable = true, mapformats = false, accelerate = true, method = :hash) + + + left_r1_t = Dataset([Union{Missing, Date}[Date("2019-10-05"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-02"), Date("2019-10-02"), Date("2020-01-01"), Date("2019-10-01"), Date("2019-10-02"), Date("2019-10-05"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-04"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03"), Date("2019-10-03")], Union{Missing, String}["A", "A", "A", "A", "A", "B", "B", "A", "B", "A", "B", "B", "B", "B", "A", "A", "B", "B"], Union{Missing, Int64}[3, 4, 1, 3, 4, 5, 6, 3, 5, 1, 8, 5, 6, 8, 1, 3, 5, 6]], ["date", "store", "employee_ID"]) + @test left_r1 == left_r1_t + @test left_r1_v == left_r1_t + @test left_r1_a == left_r1_t + @test left_r1_v_a == left_r1_t dsl = Dataset(x = [1,2,1,2], y = PooledArray([1.0, 5.0, 2.0, 1.0])) dsr = Dataset(x = [2,1,2], y1 = PooledArray([0, -1,1]), y2 = PooledArray([5,2,2]), z=[111,222,333]) @@ -2101,6 +2306,17 @@ end @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2], y = [1.0,1], y1 = [-1,0], y2 = [2,5], z= [222,111]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + dsl = Dataset(x = [1,2,1,2], y = ([1.0, 5.0, 2.0, 1.0])) dsr = Dataset(x = [2,1,2], y1 = PooledArray([0, -1,1]), y2 = PooledArray([5,2,2]), z=[111,222,333]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) @@ -2113,6 +2329,16 @@ end @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2], y = [1.0,1], y1 = [-1,0], y2 = [2,5], z= [222,111]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + dsl = Dataset(x = [1,2,1,2], y = PooledArray([1.0, 5.0, 2.0, 1.0])) dsr = Dataset(x = [2,1,2], y1 = ([0, -1,1]), y2 = PooledArray([5,2,2]), z=[111,222,333]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) @@ -2125,6 +2351,16 @@ end @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2], y = [1.0,1], y1 = [-1,0], y2 = [2,5], z= [222,111]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + dsl = Dataset(x = [1,2,1,2], y = PooledArray([1.0, 5.0, 2.0, 1.0])) dsr = Dataset(x = [2,1,2], y1 = ([0, -1,1]), y2 = ([5,2,2]), z=[111,222,333]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) @@ -2137,6 +2373,16 @@ end @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2], y = [1.0,1], y1 = [-1,0], y2 = [2,5], z= [222,111]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + dsl = Dataset(x = [1,2,1,2], y = ([1.0, 5.0, 2.0, 1.0])) dsr = Dataset(x = [2,1,2], y1 = ([0, -1,1]), y2 = ([5,2,2]), z=[111,222,333]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) @@ -2149,6 +2395,16 @@ end @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2], y = [1.0,1], y1 = [-1,0], y2 = [2,5], z= [222,111]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + #views dsl1 = Dataset(x = [1,2,1,2], y = PooledArray([1.0, 5.0, 2.0, 1.0])) @@ -2165,6 +2421,16 @@ end @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true, method = :hash) == Dataset(x = [1,2], y = [1.0,1], y1 = [-1,0], y2 = [2,5], z= [222,111]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false, method = :hash) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + dsl1 = Dataset(x = [1,2,1,2], y = ([1.0, 5.0, 2.0, 1.0])) dsr1 = Dataset(x = [2,1,2], y1 = PooledArray([0, -1,1]), y2 = PooledArray([5,2,2]), z=[111,222,333]) dsl = view(dsl1, [1,2,3,4], [1,2]) @@ -2179,6 +2445,16 @@ end @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true, method = :hash) == Dataset(x = [1,2], y = [1.0,1], y1 = [-1,0], y2 = [2,5], z= [222,111]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false, method = :hash) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + dsl1 = Dataset(x = [1,2,1,2], y = PooledArray([1.0, 5.0, 2.0, 1.0])) dsr1 = Dataset(x = [2,1,2], y1 = ([0, -1,1]), y2 = PooledArray([5,2,2]), z=[111,222,333]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) @@ -2191,6 +2467,16 @@ end @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true, method = :hash) == Dataset(x = [1,2], y = [1.0,1], y1 = [-1,0], y2 = [2,5], z= [222,111]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false, method = :hash) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + dsl1 = Dataset(x = [1,2,1,2], y = PooledArray([1.0, 5.0, 2.0, 1.0])) dsr1 = Dataset(x = [2,1,2], y1 = ([0, -1,1]), y2 = ([5,2,2]), z=[111,222,333]) dsl = view(dsl1, [1,2,3,4], [1,2]) @@ -2205,6 +2491,16 @@ end @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true, method = :hash) == Dataset(x = [1,2], y = [1.0,1], y1 = [-1,0], y2 = [2,5], z= [222,111]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false, method = :hash) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + dsl1 = Dataset(x = [1,2,1,2], y = ([1.0, 5.0, 2.0, 1.0])) dsr1 = Dataset(x = [2,1,2], y1 = ([0, -1,1]), y2 = ([5,2,2]), z=[111,222,333]) dsl = view(dsl1, [1,2,3,4], [1,2]) @@ -2219,6 +2515,16 @@ end @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true, method = :hash) == Dataset(x = [1,2], y = [1.0,1], y1 = [-1,0], y2 = [2,5], z= [222,111]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false, method = :hash) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false) == Dataset(x = [1,2,1,2,2], y = [1.0, 5,2,1,1], y1 = [-1,0,-1,1,0], y2 = [2,5,2,2,5], z= [222,111,222,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2,2], y = [1.0,5,2,1,1], y1 = [-1,missing,missing,1,0], y2 = [2,missing,missing,2,5], z= [222,missing,missing,333,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, :y2)], method = :hash, droprangecols = false, strict_inequality = true) == Dataset(x = [1,2,1,2], y = [1.0,5,2,1], y1 = [-1,missing,missing,0], y2 = [2,missing,missing,5], z= [222,missing,missing,111]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(:y1, nothing)], method = :hash, droprangecols = false) == Dataset(x = [1,2,2,1,2,2], y=[1.0, 5,5,2,1,1], y1 = [-1,0,1,-1,0,1], y2=[2,5,2,2,5,2], z = [222,111,333,222,111,333]) + dsl1 = Dataset(x = [1,2,1,2], y = PooledArray([1.0, 5.0, 2.0, 1.0])) dsr1 = Dataset(x = [2,1,2], y1 = PooledArray([0, -1,1]), y2 = PooledArray([5,2,2]), z=[111,222,333]) @@ -2226,14 +2532,20 @@ end dsr = view(dsr1, [3,1,2,2], [4,1,3,2]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, method = :hash) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, method = :hash) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) + dsr = Dataset(view(dsr1, [3,1,2,2], [4,1,3,2])) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, method = :hash) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, method = :hash) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) dsl = Dataset(view(dsl1, [4,4,4,1,1,2,2], [2,1])) dsr = view(dsr1, [3,1,2,2], [4,1,3,2]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) @test innerjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, method = :hash) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) - + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) + @test leftjoin(dsl, dsr, on = [:x=>:x, :y=>(nothing, :y2)], droprangecols = false, method = :hash) == Dataset(y = [fill(1.0, 10); fill(5,2)], x = [fill(2,6);fill(1,4);fill(2,2)], z = [repeat([333,111], 3); fill(222,4); fill(111,2)], y2 = [2,5,2,5,2,5,2,2,2,2,5,5], y1 = [1,0,1,0,1,0, -1,-1,-1,-1, 0, 0]) dsl = Dataset(rand(1:10, 10, 3), [:x1,:x2, :x3]) dsr = Dataset(rand(1:10, 4,3), [:x1, :x2, :y]) @@ -2262,6 +2574,26 @@ end @test innerjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true, method = :hash) == innerjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true, stable = true) @test innerjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true, method = :hash) == innerjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true, stable = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) + + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, stable = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true, stable = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true, stable = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true, stable=true) + + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) + + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, stable = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true, stable = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true, stable = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true, stable = true) + dsl = Dataset(rand(1:10, 10, 3), [:x1,:x2, :x3]) dsr = Dataset(rand(1:10, 4,3), [:x1, :x2, :y]) for i in 1:3 @@ -2294,6 +2626,26 @@ end @test innerjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true, method = :hash) == innerjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) @test innerjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true, method = :hash) == innerjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) + + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), dsr, on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) + + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) + + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, nothing)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(nothing, :y)], droprangecols = false, makeunique = true) + @test leftjoin(view(dsl, l_ridx, l_cidx), view(dsr, r_ridx, r_cidx), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true, method = :hash) == leftjoin(Dataset(view(dsl, l_ridx, l_cidx)), Dataset(view(dsr, r_ridx, r_cidx)), on = [:x1=>:x1, :x2=>(:x2, :y)], droprangecols = false, makeunique = true, strict_inequality = true) + dsl = Dataset(x1 = [1,2,1,3], y = [-1.2,-3,2.1,-3.5]) dsr = Dataset(x1 = [1,2,3], lower = [0, -3,1], upper = [1,0,2]) @test contains(dsl, dsr, on = [1=>1, 2=>(2,3)]) == [0,1,0,0]